From b9352104b063ba06dd6dba23b46a9139a7c758df Mon Sep 17 00:00:00 2001 From: Peter Pentchev Date: Fri, 27 Jun 2008 12:38:16 +0300 Subject: Import bomstrip_9.orig.tar.gz [dgit import orig bomstrip_9.orig.tar.gz] --- BomStrip.java | 37 ++++++++++++++++++++++ bom0 | 1 + bom1 | 1 + bomstrip.awk | 6 ++++ bomstrip.b | 43 ++++++++++++++++++++++++++ bomstrip.bf | 9 ++++++ bomstrip.c | 37 ++++++++++++++++++++++ bomstrip.cpp | 49 +++++++++++++++++++++++++++++ bomstrip.fs | 45 +++++++++++++++++++++++++++ bomstrip.hs | 9 ++++++ bomstrip.ocaml | 15 +++++++++ bomstrip.ook | 60 ++++++++++++++++++++++++++++++++++++ bomstrip.pas | 67 ++++++++++++++++++++++++++++++++++++++++ bomstrip.php | 12 ++++++++ bomstrip.pl | 8 +++++ bomstrip.ps | 59 +++++++++++++++++++++++++++++++++++ bomstrip.py | 45 +++++++++++++++++++++++++++ bomstrip.rb | 7 +++++ bomstrip.sed | 20 ++++++++++++ bomstrip.unl | 67 ++++++++++++++++++++++++++++++++++++++++ bomstrip2.pl | 3 ++ bomstrip_expl.bf | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ correct/rbom0 | 0 correct/rbom1 | 1 + correct/rnobom0 | 0 correct/rnobom1 | 1 + correct/rnobom2 | 1 + nobom0 | 0 nobom1 | 1 + nobom2 | 1 + test.sh | 16 ++++++++++ 31 files changed, 714 insertions(+) create mode 100644 BomStrip.java create mode 100644 bom0 create mode 100644 bom1 create mode 100755 bomstrip.awk create mode 100755 bomstrip.b create mode 100755 bomstrip.bf create mode 100755 bomstrip.c create mode 100755 bomstrip.cpp create mode 100755 bomstrip.fs create mode 100755 bomstrip.hs create mode 100755 bomstrip.ocaml create mode 100755 bomstrip.ook create mode 100755 bomstrip.pas create mode 100755 bomstrip.php create mode 100755 bomstrip.pl create mode 100755 bomstrip.ps create mode 100755 bomstrip.py create mode 100755 bomstrip.rb create mode 100755 bomstrip.sed create mode 100755 bomstrip.unl create mode 100755 bomstrip2.pl create mode 100755 bomstrip_expl.bf create mode 100644 correct/rbom0 create mode 100644 correct/rbom1 create mode 100644 correct/rnobom0 create mode 100644 correct/rnobom1 create mode 100644 correct/rnobom2 create mode 100644 nobom0 create mode 100644 nobom1 create mode 100644 nobom2 create mode 100755 test.sh diff --git a/BomStrip.java b/BomStrip.java new file mode 100644 index 0000000..416ead9 --- /dev/null +++ b/BomStrip.java @@ -0,0 +1,37 @@ +public class BomStrip { + + private static final int BUFFER_LENGTH = 65536; + private static final int EOF = -1; + private static final byte UTF8_BOM_1 = (byte) 0xef; + private static final byte UTF8_BOM_2 = (byte) 0xbb; + private static final byte UTF8_BOM_3 = (byte) 0xbf; + + + public static void main(String[] args) { + final byte[] utf8Bom = {UTF8_BOM_1, UTF8_BOM_2, UTF8_BOM_3}; + byte[] buffer = new byte[BUFFER_LENGTH]; + byte[] bomBuffer = new byte[utf8Bom.length]; + + try { + int nRead = System.in.read(bomBuffer, 0, bomBuffer.length); + if (nRead != EOF) { + if (!java.util.Arrays.equals(bomBuffer, utf8Bom)) { + System.out.write(bomBuffer, 0, nRead); + } + boolean eof = false; + while (!eof) { + nRead = System.in.read(buffer, 0, buffer.length); + eof = nRead == EOF; + if (!eof) { + System.out.write(buffer, 0, nRead); + } + } + } + } + catch (java.io.IOException e) + { + System.err.println("I/O error occurred: " + e.getMessage()); + System.exit(1); + } + } +} diff --git a/bom0 b/bom0 new file mode 100644 index 0000000..5f28270 --- /dev/null +++ b/bom0 @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/bom1 b/bom1 new file mode 100644 index 0000000..a3e4ba3 --- /dev/null +++ b/bom1 @@ -0,0 +1 @@ +blieb \ No newline at end of file diff --git a/bomstrip.awk b/bomstrip.awk new file mode 100755 index 0000000..290a6d9 --- /dev/null +++ b/bomstrip.awk @@ -0,0 +1,6 @@ +#!/usr/bin/awk -f +# by Peter Pentchev, 2008, public domain. +# does not work on the one true awk, does work on the gnu clone. but still then fails if file does not end in newline. + +NR == 1 && /^/ { sub("^...", "") } +{ print } diff --git a/bomstrip.b b/bomstrip.b new file mode 100755 index 0000000..aab1bfa --- /dev/null +++ b/bomstrip.b @@ -0,0 +1,43 @@ +implement Bomstrip; + +include "sys.m"; +include "draw.m"; + +Bomstrip: module +{ + init: fn(ctxt: ref Draw->Context, argv: list of string); +}; + +init(nil: ref Draw->Context, nil: list of string) +{ + sys := load Sys Sys->PATH; + buf := array[sys->ATOMICIO] of byte; + utf8bom := array[3] of {byte 16rEF, byte 16rBB, byte 16rBF}; + stdin := sys->fildes(0); + stdout := sys->fildes(1); + + n := sys->read(stdin, buf, len buf); + if(n < 0) + raise "fail:read"; + if(n == 0) + return; + m := 3; + if(n < m) + m = n; + if(m != 3 + || buf[0] != utf8bom[0] + || buf[1] != utf8bom[1] + || buf[2] != utf8bom[2]) + sys->write(stdout, buf[0:3], m); + if(n > 3) + sys->write(stdout, buf[3:n], n-3); + + for(;;) { + n = sys->read(stdin, buf, len buf); + if(n < 0) + raise "fail:read"; + if(n == 0) + break; + sys->write(stdout, buf, n); + } +} diff --git a/bomstrip.bf b/bomstrip.bf new file mode 100755 index 0000000..83db841 --- /dev/null +++ b/bomstrip.bf @@ -0,0 +1,9 @@ +,+[-[>>+<+<-]>>>>>>+++++[<++++>-]<[<++++>-]<[<+++>-]<-[<->-]+<[>-> +>>++++[<++++[<++++[<++++[<+>-]>-]>-]>-]<<<<[-]]>[-,+[-[>>+<+<-]>>> +>>>+++++++[<+++>-]<[<+++>-]<[<+++>-]<--[<->-]+<[>->>>++++[<++++[<+ ++++[<++++[<+>-]>-]>-]>-]<<<<[-]]>[-,+[-[>>+<+<-]>>>>>>+++++++[<+++ +>-]<[<+++>-]<[<+++>-]<++[<->-]+<[>->>>++++[<++++[<++++[<++++[<+>-] +>-]>-]>-]<<<<[-]]>[-<<[-]<<<[-]<<<[-]>>>>>>>>]<<<]]<<<]]<<<]>[.>]> +>[.>]>>[.>],+[-.,+] + +Berteun Damman; 2005; Public Domain; diff --git a/bomstrip.c b/bomstrip.c new file mode 100755 index 0000000..6953414 --- /dev/null +++ b/bomstrip.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include + +void +usage(char *prog) +{ + fprintf(stderr, "usage: %s\n", prog); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + size_t nread; + char buf[65536]; + char *utf8bom = "\xef\xbb\xbf"; + + if (argc > 1) + usage(argv[0]); + + nread = fread(buf, 1, strlen(utf8bom), stdin); + if (nread == 0) + return 0; + if (strcmp(buf, utf8bom) != 0) + fwrite(buf, 1, nread, stdout); + for (;;) { + nread = fread(buf, 1, sizeof buf, stdin); + if (nread < 0) + exit(1); + if (nread == 0) + return 0; + fwrite(buf, 1, nread, stdout); + } + return 0; +} diff --git a/bomstrip.cpp b/bomstrip.cpp new file mode 100755 index 0000000..51ca93c --- /dev/null +++ b/bomstrip.cpp @@ -0,0 +1,49 @@ +/* by Peter Pentchev, 2008, public domain. */ + +#include +#include +#include + +using namespace std; + +const char *utf8bom = "\xef\xbb\xbf"; + +static void usage(const char *); +static void outendl(string &); + +static void +usage(const char *prog) +{ + cerr << "usage: " << prog << endl; + exit(1); +} + +static void +outendl(string &s) +{ + cout << s; + if (!cin.eof()) + cout << endl; +} + +int +main(int argc, const char * const argv[]) +{ + string s; + + if (argc > 1) + usage(argv[0]); + + /* Empty? */ + if (!getline(cin, s)) + return 0; + + /* First line... */ + if (!s.substr(0, 3).compare(utf8bom)) + s = s.substr(3); + outendl(s); + + /* ...and the rest. */ + while (getline(cin, s)) + outendl(s); +} diff --git a/bomstrip.fs b/bomstrip.fs new file mode 100755 index 0000000..3f7d3cd --- /dev/null +++ b/bomstrip.fs @@ -0,0 +1,45 @@ +\ by Peter Pentchev, 2008, public domain. +\ +\ This works with GNU Forth. It really ought to work with other +\ Forths, too - as long as they have the STDIN and STDOUT words, +\ which at least FICL seems to be missing. + +\ The buffer where the input is read - 8KB should be enough for everyone ;) +CREATE STRIP-BUF 8192 CHARS ALLOT + +\ The UTF-8 BOM to compare to +CREATE UTF-8-BOM 239 C, 187 C, 191 C, + +\ Read three bytes, skip them if it is the BOM, output them otherwise +: STRIP-FIRST ( -- ) + ( read three bytes ) + STRIP-BUF 3 STDIN READ-FILE IF EXIT THEN + ( less than three bytes read? ) + DUP 3 < IF + ( yep, just write them to stdout ) + STRIP-BUF SWAP STDOUT WRITE-FILE IF BYE THEN + ELSE + ( are they the same as the three bytes in the UTF-8-BOM? ) + STRIP-BUF SWAP 2DUP UTF-8-BOM 3 COMPARE IF + ( nope, must copy them, write them to stdout ) + STDOUT WRITE-FILE IF BYE THEN + THEN + THEN ; + +\ Read as much as we can from stdin and copy it to stdout, in 8192-byte blocks +: STRIP-REST ( -- ) + ( read up to 8KB ) + STRIP-BUF 8192 STDIN READ-FILE IF BYE THEN + ( just return on EOF ) + DUP 0= IF EXIT THEN + ( copy to stdout ) + STRIP-BUF SWAP STDOUT WRITE-FILE IF BYE THEN + ( actually equivalent to a forever loop :) + RECURSE ; + +\ First examine the first three bytes, then copy the rest +: BOMSTRIP ( -- ) + STRIP-FIRST STRIP-REST ; + +\ This is a bomstrip filter - run BOMSTRIP, then exit the interpreter +BOMSTRIP BYE diff --git a/bomstrip.hs b/bomstrip.hs new file mode 100755 index 0000000..f2de38a --- /dev/null +++ b/bomstrip.hs @@ -0,0 +1,9 @@ +-- by mechiel lukkien, 18 september 2005, public domain + +putStrNoBOM :: String -> IO () +putStrNoBOM ('\xef':'\xbb':'\xbf':s) = putStr s +putStrNoBOM s = putStr s + +main :: IO () +main = do s <- getContents + putStrNoBOM s diff --git a/bomstrip.ocaml b/bomstrip.ocaml new file mode 100755 index 0000000..e6f70d5 --- /dev/null +++ b/bomstrip.ocaml @@ -0,0 +1,15 @@ +(* by mechiel lukkien, 18 september 2005, public domain *) + +let buf = String.create 3 in +let n = input stdin buf 0 3 in +match (n, buf) with + (3, "\xef\xbb\xbf") -> () + | (n, buf) -> output stdout buf 0 n +;; + +try +while true do + output_char stdout (input_char stdin) +done +with End_of_file -> () +;; diff --git a/bomstrip.ook b/bomstrip.ook new file mode 100755 index 0000000..e963cd3 --- /dev/null +++ b/bomstrip.ook @@ -0,0 +1,60 @@ +Ook. Ook! Ook. Ook. Ook! Ook? Ook! Ook! Ook! Ook? Ook. Ook? Ook. Ook? +Ook. Ook. Ook? Ook. Ook. Ook. Ook? Ook. Ook! Ook! Ook? Ook! Ook. Ook? +Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook. Ook. Ook. +Ook. Ook. Ook. Ook. Ook. Ook. Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook. +Ook. Ook. Ook. Ook. Ook. Ook? Ook! Ook! Ook? Ook! Ook? Ook. Ook! Ook? +Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook? Ook! Ook! +Ook? Ook! Ook? Ook. Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. +Ook. Ook? Ook! Ook! Ook? Ook! Ook? Ook. Ook! Ook! Ook! Ook? Ook? Ook. +Ook! Ook! Ook. Ook? Ook! Ook! Ook? Ook! Ook. Ook. Ook? Ook. Ook! Ook? +Ook. Ook? Ook! Ook! Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook. Ook. Ook. +Ook. Ook. Ook. Ook. Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. +Ook. Ook. Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. +Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook! Ook? +Ook? Ook. Ook. Ook. Ook. Ook? Ook! Ook! Ook? Ook! Ook. Ook? Ook! Ook! +Ook? Ook! Ook. Ook? Ook! Ook! Ook? Ook! Ook. Ook? Ook! Ook! Ook? Ook! +Ook? Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook! Ook? Ook! Ook! Ook? Ook! +Ook? Ook! Ook. Ook? Ook! Ook? Ook! Ook! Ook. Ook! Ook. Ook. Ook! Ook? +Ook! Ook! Ook! Ook? Ook. Ook? Ook. Ook? Ook. Ook. Ook? Ook. Ook. Ook. +Ook? Ook. Ook! Ook! Ook? Ook! Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook? +Ook. Ook? Ook. Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. +Ook. Ook. Ook. Ook. Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. +Ook. Ook? Ook! Ook! Ook? Ook! Ook? Ook. Ook! Ook? Ook? Ook. Ook. Ook. +Ook. Ook. Ook. Ook. Ook. Ook? Ook! Ook! Ook? Ook! Ook? Ook. Ook! Ook? +Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook? Ook! Ook! Ook? Ook! +Ook? Ook. Ook! Ook! Ook! Ook! Ook! Ook? Ook? Ook. Ook! Ook! Ook. Ook? +Ook! Ook! Ook? Ook! Ook. Ook. Ook? Ook. Ook! Ook? Ook. Ook? Ook! Ook! +Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. +Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook! Ook? +Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook! Ook? Ook? Ook. +Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook! Ook? Ook? Ook. Ook. Ook. +Ook. Ook? Ook! Ook! Ook? Ook! Ook. Ook? Ook! Ook! Ook? Ook! Ook. Ook? +Ook! Ook! Ook? Ook! Ook. Ook? Ook! Ook! Ook? Ook! Ook? Ook. Ook? Ook. +Ook? Ook. Ook? Ook. Ook! Ook? Ook! Ook! Ook? Ook! Ook? Ook! Ook. Ook? +Ook! Ook? Ook! Ook! Ook. Ook! Ook. Ook. Ook! Ook? Ook! Ook! Ook! Ook? +Ook. Ook? Ook. Ook? Ook. Ook. Ook? Ook. Ook. Ook. Ook? Ook. Ook! Ook! +Ook? Ook! Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook? +Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. +Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook? Ook! Ook! +Ook? Ook! Ook? Ook. Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. +Ook. Ook? Ook! Ook! Ook? Ook! Ook? Ook. Ook! Ook? Ook? Ook. Ook. Ook. +Ook. Ook. Ook. Ook. Ook. Ook? Ook! Ook! Ook? Ook! Ook? Ook. Ook. Ook. +Ook. Ook. Ook! Ook? Ook? Ook. Ook! Ook! Ook. Ook? Ook! Ook! Ook? Ook! +Ook. Ook. Ook? Ook. Ook! Ook? Ook. Ook? Ook! Ook! Ook. Ook? Ook. Ook? +Ook. Ook? Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook! Ook? Ook? Ook. +Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook. Ook! Ook? Ook? Ook. Ook. Ook. +Ook. Ook. Ook. Ook. Ook. Ook. Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook. +Ook. Ook. Ook. Ook. Ook! Ook? Ook? Ook. Ook. Ook. Ook. Ook? Ook! Ook! +Ook? Ook! Ook. Ook? Ook! Ook! Ook? Ook! Ook. Ook? Ook! Ook! Ook? Ook! +Ook. Ook? Ook! Ook! Ook? Ook! Ook? Ook. Ook? Ook. Ook? Ook. Ook? Ook. +Ook! Ook? Ook! Ook! Ook? Ook! Ook? Ook! Ook. Ook? Ook! Ook? Ook! Ook! +Ook? Ook. Ook? Ook. Ook! Ook? Ook! Ook! Ook? Ook! Ook? Ook. Ook? Ook. +Ook? Ook. Ook! Ook? Ook! Ook! Ook? Ook! Ook? Ook. Ook? Ook. Ook? Ook. +Ook! Ook? Ook! Ook! Ook? Ook! Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook? +Ook. Ook? Ook. Ook? Ook. Ook? Ook. Ook? Ook? Ook! Ook? Ook. Ook? Ook. +Ook? Ook. Ook? Ook! Ook? Ook! Ook? Ook. Ook? Ook. Ook? Ook. Ook? Ook! +Ook? Ook! Ook? Ook. Ook? Ook. Ook? Ook. Ook? Ook! Ook. Ook? Ook! Ook? +Ook! Ook. Ook. Ook? Ook? Ook! Ook. Ook? Ook. Ook? Ook! Ook? Ook! Ook. +Ook. Ook? Ook? Ook! Ook. Ook? Ook. Ook? Ook! Ook? Ook! Ook. Ook. Ook? +Ook? Ook! Ook. Ook! Ook. Ook. Ook! Ook? Ook! Ook! Ook! Ook. Ook. Ook! +Ook. Ook. Ook? Ook! \ No newline at end of file diff --git a/bomstrip.pas b/bomstrip.pas new file mode 100755 index 0000000..705f2a2 --- /dev/null +++ b/bomstrip.pas @@ -0,0 +1,67 @@ +Program Bomstrip; +(* Berteun Damman, 2005, Public Domain + * Compiles with FreePascal and GNU Pasal (at least) and + * besides that also seems to work.... + *) + +Type + BOMT = Array[1..3] of Byte; + +Const + UTF8BOM: BOMT = ($EF, $BB, $BF); + +Var + C: Char; + F: Boolean; + I, J: Integer; + BOM: BOMT; + +Begin + (* Try to read 3 Bytes, if an EOF happens before, gracefully exit, + and print the first one or two bytes. + *) + I := 0; + While Not EOF And Not EOLn And (I <= 2) Do + Begin + Inc(I); + Read(C); + BOM[I] := Ord(C); + End; + + (* Assume we have a BOM if the input has length 3 *) + F := (I = 3); + For J := 1 To I Do + If BOM[J] <> UTF8BOM[J] Then + F := False; + + (* Not a BOM, print it. *) + If Not F Then + For J := 1 To I Do + Write(Chr(BOM[J])); + + (* If EOF, then Exit *) + If EOF Then + Exit; + + (* Print the remainder. *) + While Not EOF Do + Begin + If EOLn Then + Begin + (* Actually GPC reads an EOLn as a space. I + * do not know whether that is correct (FPC does not) + * but this works either way. + * Perhaps we do run into some implicit CR/LF <-> LF + * conversion. + * TODO Test this! + *) + WriteLn; + Read(C); + End + Else + Begin + Read(C); + Write(C); + End; + End; +End. diff --git a/bomstrip.php b/bomstrip.php new file mode 100755 index 0000000..bc4e1eb --- /dev/null +++ b/bomstrip.php @@ -0,0 +1,12 @@ +#!/usr/bin/env php + diff --git a/bomstrip.pl b/bomstrip.pl new file mode 100755 index 0000000..599a346 --- /dev/null +++ b/bomstrip.pl @@ -0,0 +1,8 @@ +#!/usr/bin/perl + +my $buf; +if (read STDIN, $buf, 3) { + print $buf if $buf ne "\xef\xbb\xbf"; + undef $/; + print ; +} diff --git a/bomstrip.ps b/bomstrip.ps new file mode 100755 index 0000000..eeff5be --- /dev/null +++ b/bomstrip.ps @@ -0,0 +1,59 @@ +%!PS-Adobe-2.0 +%%Title: Bomstrip +%%Author: Berteun Damman +% Run with something like: gs -q -sDEVICE=nullpage bomstrip.ps +% Public Domain, 2005 + +% Definitions +/inc { % Stack: Number + 1 add +} def + +% No graphics output +nulldevice + +(%stdin) (r) file +/input exch def +(%stdout) (w) file +/output exch def + +0 % Number of bytes read +3 { + input read { + exch + inc + } { + exit + } ifelse +} repeat +dup +% Did we read less than three bytes? +2 le { + dup 1 ge { + % Swap if needed + dup inc -1 roll exch + { output exch write } repeat + } if + quit +} if + +% We read at least three bytes, copy them for comparison +copy +16#BF 4 2 roll +16#BB 5 2 roll +16#EF 6 2 roll +true +3 { 3 1 roll eq and } repeat + +not { + % Reverse the order + exch 3 -1 roll + 3 { output exch write } repeat +} if + +% Final output loop +{ + input read + not { quit } if + output exch write +} loop diff --git a/bomstrip.py b/bomstrip.py new file mode 100755 index 0000000..c148fda --- /dev/null +++ b/bomstrip.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python2.3 + +""" +Strip leading byte-order-mark from utf-8 files. +""" + +import sys + +def usage(prog): + print >>sys.stderr, 'usage: %s' % prog + sys.exit(1) + +def main(prog, *args): + bufsize = 65536 + utf8bom = '\xef\xbb\xbf' + + from getopt import getopt, GetoptError + try: + opts, args = getopt(args, '') + except GetoptError: + usage(prog) + + if args: + usage(prog) + + inf = sys.stdin + outf = sys.stdout + + buf = inf.read(len(utf8bom)) + if buf != utf8bom: + outf.write(buf) + if buf == '': + return + + while True: + buf = inf.read(bufsize) + if buf == '': + break + outf.write(buf) + +if __name__ == '__main__': + try: + main(*sys.argv) + except KeyboardInterrupt: + sys.exit(1) diff --git a/bomstrip.rb b/bomstrip.rb new file mode 100755 index 0000000..67e7b0a --- /dev/null +++ b/bomstrip.rb @@ -0,0 +1,7 @@ +#!/usr/bin/env ruby +if !(buf = STDIN.read(3)).nil? && buf != "\xef\xbb\xbf" + STDOUT.write(buf) +end +while !(buf = STDIN.read(8*1024)).nil? + STDOUT.write(buf) +end diff --git a/bomstrip.sed b/bomstrip.sed new file mode 100755 index 0000000..1d6d757 --- /dev/null +++ b/bomstrip.sed @@ -0,0 +1,20 @@ +# this only works with some seds. +# lines without newline should get a newline after outputting. +# (strange, but that's how sed is supposed to work). +# actually, it seems sed input "should" always have a newline at +# the end of a file. + +# it is know to work with: +# - sed on openbsd 3.7 +# - sed on debian (probably gnu sed) + +# it is know to not work with: +# - sun os (ignores "trailing text without newline" all together) +# - mac os and plan 9 (their sed print newlines after every "line" +# of input, even when it does not have a newline). since mac os +# sed seems to come from freebsd, it is expected that freebsd sed +# also does not work. + +# too bad... + +1s/^// diff --git a/bomstrip.unl b/bomstrip.unl new file mode 100755 index 0000000..5a795f1 --- /dev/null +++ b/bomstrip.unl @@ -0,0 +1,67 @@ +# BOM strip implementation in Unlambda +# +# Author: Matthijs Bomhoff +# +# This implementation is mainly constructed by composing pieces from +# http://www.madore.org/~david/programs/unlambda/ +# +# Maybe, if this was not the first program I ever wrote in Unlambda, +# it would have been nicer ;) + +` +` +` +` +` +#IF +``s`kc``s`k`s`k`k`ki``ss`k`kk # + + # Read a char and compare it to the first pattern byte + ``@?ïi + +# THEN + `d``` + #IF + ``s`kc``s`k`s`k`k`ki``ss`k`kk # + + # Read a char and compare it to the second pattern byte + ``@?»i + # THEN + + `d``` + #IF + ``s`kc``s`k`s`k`k`ki``ss`k`kk # + + # Read a char and compare it to the third pattern byte + ``@?¿i + # THEN + + i + + # ELSE + # Mismatch, print first two bytes of pattern, followed by last byte read + `d`|`.ï.» + + #END IF + + # ELSE + # Mismatch, print first byte of pattern, followed by last byte read + `d`|.ï + + #END IF + +# ELSE + # Mismatch, print last byte read + `d`|i + +# END IF + +# COPY FILE +`d +```s``sii`ki +``s``s`ks``s``s`ks``s`k`s`ki``s`k`si``s`kd``s`kk +`d`@i +k`k +`d`|i + +i # Dummy function, needed as ``input'' diff --git a/bomstrip2.pl b/bomstrip2.pl new file mode 100755 index 0000000..7cb5ad1 --- /dev/null +++ b/bomstrip2.pl @@ -0,0 +1,3 @@ +#!/usr/bin/perl -wp +# by Peter Pentchev, 2008, public domain. +$. == 1 && s/^\xef\xbb\xbf//; diff --git a/bomstrip_expl.bf b/bomstrip_expl.bf new file mode 100755 index 0000000..2677922 --- /dev/null +++ b/bomstrip_expl.bf @@ -0,0 +1,93 @@ +Berteun Damman; 2005; Public Domain; + +Read the first three bytes; the BOM marker (if it's there) + +The marker; in case it's there it is 0xEF 0xBB 0xBF (239 187 191) + +Read a byte of input; if it's minus 1 we treat it as an EOF +,+ [ - + So it's not an EOF and we make two copies of it + [>>+<+<-]>> + And we put our constant after it; as the number 239 itself + is a prime we use 240 which factorizes nicely as 2^4 * 3 * 5 + and substract one from it + >>>>+++++[<++++>-]<[<++++>-]<[<+++>-]<- + + While our constant is not zero we substract one from it and also + from the input byte + [<->-] + + Now change our constant to 1 (our flag) + + + Check whether the input byte is zero; if so we have a match + < [ + It was non zero we go to our flag and turn it back + to zero + >->>>++++ + + As the input byte might be negative (or wrapped! but in that case this won't hurt) + we're going to add 256 which will make it positive for sure + [<++++[<++++[<++++[<+>-]>-]>-]>-] + <<<< + [-] + ] > [ - + If we get here the flag was non zero + We now basically do the same but for the second byte + ,+ [ - + So it's not an EOF and we make two copies of it + [>>+<+<-]>> + + And we put our constant again next to it; + We first calculate 189; which is 3^3 * 7 and substract 2 + >>>>+++++++[<+++>-]<[<+++>-]<[<+++>-]<-- The number 0xBB + + + While our constant is not zero we substract one from it and also + from the input byte + + [<->-] + + Now change our constant to 1 (our flag) + + + Check whether the input byte is zero; if so we have + a match + < [ + It was non zero we go to our flag and turn it back to zero and add in case of negative + >->>>++++ + [<++++[<++++[<++++[<+>-]>-]>-]>-] + <<<< + [-] + ] > [ - + ,+ [ - + And the whole story again + [>>+<+<-]>> + >>>>+++++++[<+++>-]<[<+++>-]<[<+++>-]<++ The number 0xBB + [<->-] + + + < [ + >->>>++++ + [<++++[<++++[<++++[<+>-]>-]>-]>-] + <<<< + [-] + + ] > [ - + Evil Bom Found: clear the inputs + <<[-]<<<[-]<<<[-] + >>>>>>>> + ] Third byte test failed! + <<< + ] End of file at third byte + ] Second byte test failed + <<< + ] End of file at second byte + ] First byte test failed + <<< +] End of file at first byte +If we broke out of the loops somewhere that means that we have to print the inputs on +the tape; we do this by going from left to right and at the positions where the +input should be we print it if it is non zero +When the BOM is found the whole tape has been made blank and we do not print anything +>[.>]>>[.>]>>[.>] + +Print the remainder +,+[-.,+] diff --git a/correct/rbom0 b/correct/rbom0 new file mode 100644 index 0000000..e69de29 diff --git a/correct/rbom1 b/correct/rbom1 new file mode 100644 index 0000000..1c2493a --- /dev/null +++ b/correct/rbom1 @@ -0,0 +1 @@ +blieb \ No newline at end of file diff --git a/correct/rnobom0 b/correct/rnobom0 new file mode 100644 index 0000000..e69de29 diff --git a/correct/rnobom1 b/correct/rnobom1 new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/correct/rnobom1 @@ -0,0 +1 @@ +1 diff --git a/correct/rnobom2 b/correct/rnobom2 new file mode 100644 index 0000000..81c545e --- /dev/null +++ b/correct/rnobom2 @@ -0,0 +1 @@ +1234 diff --git a/nobom0 b/nobom0 new file mode 100644 index 0000000..e69de29 diff --git a/nobom1 b/nobom1 new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/nobom1 @@ -0,0 +1 @@ +1 diff --git a/nobom2 b/nobom2 new file mode 100644 index 0000000..81c545e --- /dev/null +++ b/nobom2 @@ -0,0 +1 @@ +1234 diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..516ec7f --- /dev/null +++ b/test.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +test -z "$BOM" && BOM='ocaml ./bomstrip.ocaml' +test -z "$BOMFILES" && BOMFILES='bom0 bom1 nobom0 nobom1 nobom2' + +res=0 +for f in $BOMFILES; do + $BOM < "$f" > "r$f" + if ! cmp "r$f" correct/"r$f"; then + echo "$f is wrong" + res=1 + else + rm -f "r$f" + fi +done +exit "$res" -- cgit v1.2.3 From a4b5392b978b9c66e8abe2e749fb54c2a5be0a62 Mon Sep 17 00:00:00 2001 From: Peter Pentchev Date: Wed, 26 Dec 2018 01:33:57 +0200 Subject: Import bomstrip_9-12.debian.tar.xz [dgit import tarball bomstrip 9-12 bomstrip_9-12.debian.tar.xz] --- README.Debian | 15 +++++ bomstrip-files.sh | 19 ++++++ bomstrip.1 | 83 +++++++++++++++++++++++ bomstrip.dirs | 2 + bomstrip.install | 1 + bomstrip.links | 1 + bomstrip.manpages | 1 + changelog | 168 +++++++++++++++++++++++++++++++++++++++++++++++ clean | 1 + control | 20 ++++++ copyright | 13 ++++ patches/c-warnings.patch | 60 +++++++++++++++++ patches/series | 2 + patches/typos.patch | 34 ++++++++++ rules | 34 ++++++++++ source/format | 1 + tests/control | 5 ++ upstream/metadata | 4 ++ watch | 3 + 19 files changed, 467 insertions(+) create mode 100644 README.Debian create mode 100644 bomstrip-files.sh create mode 100644 bomstrip.1 create mode 100644 bomstrip.dirs create mode 100644 bomstrip.install create mode 100644 bomstrip.links create mode 100644 bomstrip.manpages create mode 100644 changelog create mode 100644 clean create mode 100644 control create mode 100644 copyright create mode 100644 patches/c-warnings.patch create mode 100644 patches/series create mode 100644 patches/typos.patch create mode 100755 rules create mode 100644 source/format create mode 100644 tests/control create mode 100644 upstream/metadata create mode 100644 watch diff --git a/README.Debian b/README.Debian new file mode 100644 index 0000000..1a94386 --- /dev/null +++ b/README.Debian @@ -0,0 +1,15 @@ +bomstrip for Debian +------------------- + +This package provides the compiled C implementation of bomstrip with +a couple of warning and reliability fixes. It also adds a manual page +and another command-line tool, bomstrip-files, for stripping files +in-place similarly to perl -i.bom or sed -i.bom. + +The bomstrip distribution itself consists of implementations of +bomstrip filters in many languages. You may see those filters in +the source package, and you may find several fixes to some of them +on my bomstrip patchset webpage - +http://devel.ringlet.net/textproc/bomstrip/ + + -- Peter Pentchev Thu, 19 Jun 2008 14:49:22 +0300 diff --git a/bomstrip-files.sh b/bomstrip-files.sh new file mode 100644 index 0000000..b0353b9 --- /dev/null +++ b/bomstrip-files.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Written by Peter Pentchev in 2008. +# This file is hereby placed into the public domain. + +set -e + +[ -z "$BOMSTRIP" ] && BOMSTRIP='bomstrip' + +if [ "$#" -eq 0 ]; then + echo 'Usage: bomstrip-files file...' 1>&2 + exit 1 +fi + +while [ "$#" -ne 0 ]; do + cp "$1" "$1.bom" + $BOMSTRIP < "$1.bom" > "$1" + shift +done diff --git a/bomstrip.1 b/bomstrip.1 new file mode 100644 index 0000000..bf1a966 --- /dev/null +++ b/bomstrip.1 @@ -0,0 +1,83 @@ +.\" Written by Peter Pentchev in 2008. +.\" This file is hereby placed into the public domain. +.\" +.Dd June 14, 2008 +.Dt BOMSTRIP 1 +.Os +.Sh NAME +.Nm bomstrip , +.Nm bomstrip-files +.Nd strip the BOM sequence from UTF-8 files +.Sh SYNOPSIS +.Nm +.Nm bomstrip-files +.Ar +.Sh DESCRIPTION +The +.Nm +utility reads UTF-8 data from its standard input and copies it to +its standard output, stripping the BOM (byte-order mark) from +the beginning of the text if it is present. +There are no command-line options and no parameters. +.Pp +The +.Nm bomstrip-files +utility removes the UTF-8 BOM from the specified files, saving each +file's original contents with a +.Pa .bom +extension. +It uses the +.Nm +utility, trying to execute it as +.Dq Nm ; +if the +.Nm +utility is installed under another name, or if a more complex command +is desired, it may be supplied in the +.Ev BOMSTRIP +environment variable. +.Sh EXAMPLES +Strip the BOM, if present, from a text file: +.Pp +.Dl bomstrip < bom.txt > nobom.txt +.Pp +Strip the BOM, if present, from all text files, backing them up with a +.Pa .bom +extension: +.Pp +.Dl bomstrip-files *.txt +.Pp +Use the OCAML implementation of +.Nm : +.Pp +.Dl env BOMSTRIP='ocaml bomstrip.ocaml' bomstrip-files *.txt +.Sh SEE ALSO +The +.Nm +home page: +.Li https://www.ueber.net/who/mjl/projects/bomstrip/ +.Sh HISTORY +The +.Nm +utility (in many languages) was written by +.An Mechiel Lukkien , +with implementations in various languages sent to him by others, including +.An Andreas Gohr , +.An Andrew Gerrand , +.An Berteun Damman , +.An Matthijs Bomhoff , +.An Peter Pentchev , +and +.An Ruben Smelik . +The +.Nm bomstrip-files +utility and this manual page were written by +.An Peter Pentchev +in the hope that they reflect the behavior of all the +.Nm +implementations in all languages. +.Sh AUTHORS +.An Mechiel Lukkien +.Aq mechiel@xs4all.nl +.An Peter Pentchev +.Aq roam@ringlet.net diff --git a/bomstrip.dirs b/bomstrip.dirs new file mode 100644 index 0000000..98d1583 --- /dev/null +++ b/bomstrip.dirs @@ -0,0 +1,2 @@ +usr/bin +usr/share/man/man1 diff --git a/bomstrip.install b/bomstrip.install new file mode 100644 index 0000000..d8f5900 --- /dev/null +++ b/bomstrip.install @@ -0,0 +1 @@ +bomstrip bomstrip-files usr/bin diff --git a/bomstrip.links b/bomstrip.links new file mode 100644 index 0000000..26fef85 --- /dev/null +++ b/bomstrip.links @@ -0,0 +1 @@ +usr/share/man/man1/bomstrip.1 usr/share/man/man1/bomstrip-files.1 diff --git a/bomstrip.manpages b/bomstrip.manpages new file mode 100644 index 0000000..88658c7 --- /dev/null +++ b/bomstrip.manpages @@ -0,0 +1 @@ +debian/bomstrip.1 diff --git a/changelog b/changelog new file mode 100644 index 0000000..518d7b0 --- /dev/null +++ b/changelog @@ -0,0 +1,168 @@ +bomstrip (9-12) unstable; urgency=medium + + * Use my Debian e-mail address. + * Declare compliance with Debian Policy 4.3.0 with no changes. + * Use the B-D: debhelper-compat (= 11) mechanism. + * Add a trivial autopkgtest running adequate on the installed package. + * Move away from git-dpm. + * Bump the debhelper compatibility level to 12 with no changes. + + -- Peter Pentchev Wed, 26 Dec 2018 01:33:57 +0200 + +bomstrip (9-11) unstable; urgency=medium + + * Declare compliance with Debian Policy 4.1.3 and drop the implied + "Testsuite: autopkgtest" source control field. + * Switch to git-dpm and rename the patches. + * Add "Rules-Requires-Root: no" to the source control stanza. + * Bump the debhelper compatibility level to 11 with no changes. + + -- Peter Pentchev Sun, 21 Jan 2018 01:38:21 +0200 + +bomstrip (9-10) unstable; urgency=medium + + * Build-depend on debhelper 10 now that it's even in jessie-backports; + remove the Lintian override. + * Correct the upstream site location in the debian/bomstrip.1 manual + page, too. + * Use the HTTPS scheme for various Debian and upstream URLs. + * Add the 02_typos patch to correct a typographical error. + + -- Peter Pentchev Mon, 09 Jan 2017 22:03:24 +0200 + +bomstrip (9-9) unstable; urgency=medium + + * Declare compliance with Debian Policy 3.9.8 with no changes. + * Drop the versioned dependency on dpkg-dev, it's satisfied everywhere. + * Update the watch file to version 4 and explicitly specify pgpmode=none. + * Add an autopkgtest suite. + * Bump the debhelper compatibility level to 10: + - override the Lintian debhelper version warning as it itself suggests + - let debhelper handle parallel building + + -- Peter Pentchev Thu, 21 Apr 2016 23:28:15 +0300 + +bomstrip (9-8) unstable; urgency=medium + + * Switch Vcs-Git and Vcs-Browser to my full-source GitLab repository. + * Declare compliance with version 3.9.6 of the Debian Policy, no changes. + * Get the build hardening flags directly from debhelper. + * Add an upstream metadata file. + + -- Peter Pentchev Sat, 05 Sep 2015 17:23:16 +0300 + +bomstrip (9-7) unstable; urgency=low + + * Remove the DM-Upload-Allowed flag since Damyan Ivanov was kind + enough to grant me permission using the new dak mechanism. + * Bump Standards-Version to 3.9.5 with no changes. + * Update the copyright file to the copyright-format/1.0 format. + * Bump the debhelper compatibility version to 9 with no changes. + * Get the hardening options directly from dpkg-buildflags: + - bump the build dependency on dpkg-dev to 1.16.1~ + - remove the build dependency on hardening-includes + - no longer include the hardening Makefile snippet into the rules file + - explicitly enable all the hardening features; they may be disabled + in the future if bomstrip should fail to build anywhere + * Enable parallel building - not that it matters a lot in this case :) + * Use DEB_CFLAGS_MAINT_APPEND to append the warning compiler flags. + * Add _POSIX_C_SOURCE and _XOPEN_SOURCE specifications, just in case. + * Drop the explicit compression specification for the Debian tarball; + it really does not matter for this package. + + -- Peter Pentchev Sun, 17 Nov 2013 14:52:55 +0200 + +bomstrip (9-6) unstable; urgency=low + + * Update the copyright file to the latest DEP 5 candidate format. + * Bump Standards-Version to 3.9.2 with no changes. + * Update the copyright file to the latest DEP 5 candidate format and + fix the DEP 5 URL after the Alioth migration. + * Specify Multi-Arch: foreign for the binary package, just in case. + + -- Peter Pentchev Thu, 14 Jul 2011 19:04:00 +0300 + +bomstrip (9-5) unstable; urgency=low + + * The upstream site has moved, update the Homepage field, the watch file + and the copyright file. + * Update the copyright file to rev. 166 of the DEP 5 candidate. + * Upload to unstable. + + -- Peter Pentchev Tue, 08 Feb 2011 16:14:12 +0200 + +bomstrip (9-4) experimental; urgency=low + + * Switch to Git and point the Vcs-* fields to Gitorious. + * We haven't been using dpatch for more than two years, so drop + the executable permissions on the patch file. + * Bump Standards-Version to 3.9.1 with no changes. + * Switch to bzip2 compression for the Debian tarball. + * Depend on a hardening package so that the build hardening is actually + done in the automated builds, and use the hardening-includes package + instead of hardening-wrapper so that the hardening flags are visible in + CFLAGS and LDFLAGS. + * Bump the debhelper compatibility level to 8 with no changes. + + -- Peter Pentchev Fri, 24 Dec 2010 15:02:51 +0200 + +bomstrip (9-3) unstable; urgency=low + + * Bump Standards-Version to 3.9.0: + - honor the "nocheck" build option and do not run dh_auto_test anyway + * Simplify the rules file: + - simplify the DEB_BUILD_HARDENING logic + - move various dh_* command parameters out into separate debian/* files + - use debhelper override rules + * Fix a rules file bug: use LDFLAGS, not CFLAGS, to link a C program. + * Use dpkg-buildflags from dpkg-dev 1.15.7 to obtain default values for + CFLAGS, CPPFLAGS, and LDFLAGS. + * Convert to the 3.0 (quilt) source format. + * Convert the copyright file to the latest DEP 5 format. + * Convert the single patch's header to the DEP 3 format. + * Shorten the Vcs-Browser URL and actually point it at the Debian package. + * Honor CPPFLAGS. + * Add DM-Upload-Allowed with Damyan Ivanov's permission. + + -- Peter Pentchev Wed, 30 Jun 2010 08:35:48 +0300 + +bomstrip (9-2) unstable; urgency=low + + * Fix the short description. Closes: #489795 + * Switch to quilt as Damyan Ivanov (dmn) suggested. + * Fix some of the checks in my "C warnings fix" patch + * Add two checks reported by the Debian hardening wrapper + * Enable build hardening unless DEB_BUILD_OPTIONS contains "nohardening" + * Use debhelper 7's features to minimize the rules file + * Spell "Public Domain" as "PD", not "other", in the copyright file + + * debian/README.source + - describe the usage of quilt, not dpatch + * debian/changelog + - use the "PD" name for the public domain license + * debian/control + - fix the short description. Closes: #489795 + - build-depend on quilt, at least 0.40 for quilt.make + * debian/patches/series + - renamed from dpatch's 00list + * debian/patches/01_c_warnings.patch + - renamed from dpatch's 01_c_warnings.dpatch + - refresh the patch with -p ab --no-index --no-timestamps + - rewrite the header, dropping the dpatch-style comments + - check the fwrite() return code for errors, found by the Debian + hardening wrapper + - restore the size_t type of nread and fix the signedness warning + the right way - fread() may never return a negative value, but + if it should returns zero, check for ferror() + * debian/rules + - use quilt.make and ${QUILT_STAMPFN} instead of dpatch's ones + - enable build hardening unless DEB_BUILD_OPTIONS contains "nohardening" + - use debhelper 7's "dh" wrapper to do most of the work + + -- Peter Pentchev Wed, 09 Jul 2008 13:40:31 +0300 + +bomstrip (9-1) unstable; urgency=low + + * Initial release. Closes: #486425 + + -- Peter Pentchev Fri, 27 Jun 2008 12:38:16 +0300 diff --git a/clean b/clean new file mode 100644 index 0000000..628455e --- /dev/null +++ b/clean @@ -0,0 +1 @@ +bomstrip.o bomstrip bomstrip-files diff --git a/control b/control new file mode 100644 index 0000000..8937cce --- /dev/null +++ b/control @@ -0,0 +1,20 @@ +Source: bomstrip +Section: text +Priority: optional +Maintainer: Peter Pentchev +Build-Depends: debhelper-compat (= 12) +Standards-Version: 4.3.0 +Homepage: https://www.ueber.net/who/mjl/projects/bomstrip/ +Vcs-Git: https://gitlab.com/ppentchev/bomstrip-pkg-debian.git +Vcs-Browser: https://gitlab.com/ppentchev/bomstrip-pkg-debian +Rules-Requires-Root: no + +Package: bomstrip +Architecture: any +Multi-Arch: foreign +Depends: ${shlibs:Depends}, ${misc:Depends} +Description: tool to strip Byte-Order Marks from UTF-8 text files + Bomstrip is a very simple tool that removes BOM's (byte-order-marks) + from UTF-8 files. UTF-8 does not have byte-ordering issues, so there + is absolutely no need to have three bytes (the UTF-8-BOM) that do not + say anything about the byte-order (since there is nothing to say). diff --git a/copyright b/copyright new file mode 100644 index 0000000..358d81e --- /dev/null +++ b/copyright @@ -0,0 +1,13 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: bomstrip +Upstream-Contact: Mechiel Lukkien +Source: https://www.ueber.net/who/mjl/projects/bomstrip/ +License: public-domain + +Files: * +Copyright: All files are in the public domain +License: public-domain + +License: public-domain + The bomstrip utility, all implementations, and all related files, + including the Debian package files, are in the public domain. diff --git a/patches/c-warnings.patch b/patches/c-warnings.patch new file mode 100644 index 0000000..390430b --- /dev/null +++ b/patches/c-warnings.patch @@ -0,0 +1,60 @@ +Description: Make this program really work and fix some compiler warnings. + - make buf[] and utf8bom[] global to make sure they're zero-initialized + and also to save stack space + - use a sane size for buf[] + - replace the fread() < 0 check with one for ferror() + - check the return code of fwrite(), too +Forwarded: yes +Author: Peter Pentchev +Last-Update: 2010-06-16 + +--- a/bomstrip.c ++++ b/bomstrip.c +@@ -3,19 +3,22 @@ + #include + #include + +-void +-usage(char *prog) ++char buf[BUFSIZ]; ++const char *utf8bom = "\xef\xbb\xbf"; ++ ++static void usage(const char *); ++ ++static void ++usage(const char *prog) + { + fprintf(stderr, "usage: %s\n", prog); + exit(1); + } + + int +-main(int argc, char *argv[]) ++main(int argc, const char * const argv[]) + { + size_t nread; +- char buf[65536]; +- char *utf8bom = "\xef\xbb\xbf"; + + if (argc > 1) + usage(argv[0]); +@@ -24,14 +27,14 @@ + if (nread == 0) + return 0; + if (strcmp(buf, utf8bom) != 0) +- fwrite(buf, 1, nread, stdout); ++ if (fwrite(buf, 1, nread, stdout) < nread) ++ exit(1); + for (;;) { + nread = fread(buf, 1, sizeof buf, stdin); +- if (nread < 0) +- exit(1); + if (nread == 0) +- return 0; +- fwrite(buf, 1, nread, stdout); ++ return ferror(stdin) && 1; ++ if (fwrite(buf, 1, nread, stdout) < nread) ++ exit(1); + } + return 0; + } diff --git a/patches/series b/patches/series new file mode 100644 index 0000000..758e270 --- /dev/null +++ b/patches/series @@ -0,0 +1,2 @@ +c-warnings.patch +typos.patch diff --git a/patches/typos.patch b/patches/typos.patch new file mode 100644 index 0000000..305a9be --- /dev/null +++ b/patches/typos.patch @@ -0,0 +1,34 @@ +Description: Correct a typographical error. +Forwarded: no +Author: Peter Pentchev +Last-Update: 2017-01-09 + +--- a/bomstrip_expl.bf ++++ b/bomstrip_expl.bf +@@ -10,10 +10,10 @@ + [>>+<+<-]>> + And we put our constant after it; as the number 239 itself + is a prime we use 240 which factorizes nicely as 2^4 * 3 * 5 +- and substract one from it ++ and subtract one from it + >>>>+++++[<++++>-]<[<++++>-]<[<+++>-]<- + +- While our constant is not zero we substract one from it and also ++ While our constant is not zero we subtract one from it and also + from the input byte + [<->-] + +@@ -38,11 +38,11 @@ + [>>+<+<-]>> + + And we put our constant again next to it; +- We first calculate 189; which is 3^3 * 7 and substract 2 ++ We first calculate 189; which is 3^3 * 7 and subtract 2 + >>>>+++++++[<+++>-]<[<+++>-]<[<+++>-]<-- The number 0xBB + + +- While our constant is not zero we substract one from it and also ++ While our constant is not zero we subtract one from it and also + from the input byte + + [<->-] diff --git a/rules b/rules new file mode 100755 index 0000000..0d1100c --- /dev/null +++ b/rules @@ -0,0 +1,34 @@ +#!/usr/bin/make -f +# +# Debhelper rules for bomstrip, the UTF-8 BOM strip utility +# Written by Peter Pentchev in 2008. +# This file is hereby placed into the public domain. + +# Aim for the top, adapt if anything should break on the buildds. +DEB_BUILD_MAINT_OPTIONS= hardening=+all +export DEB_BUILD_MAINT_OPTIONS + +DEB_CPPFLAGS_MAINT_APPEND= -D_POSIX_C_SOURCE=200809L -D_XOPEN_SOURCE=700 +DEB_CFLAGS_MAINT_APPEND= -pipe -Wall -W -ansi -pedantic -Wbad-function-cast \ + -Wcast-align -Wcast-qual -Wchar-subscripts -Winline \ + -Wmissing-prototypes -Wnested-externs -Wpointer-arith \ + -Wredundant-decls -Wshadow -Wstrict-prototypes -Wwrite-strings +ifneq (,$(filter werror,$(DEB_BUILD_OPTIONS))) + DEB_CFLAGS_MAINT_APPEND+= -Werror +endif +export DEB_CFLAGS_MAINT_APPEND DEB_CPPFLAGS_MAINT_APPEND + +CC?= cc + +override_dh_auto_build: + ${CC} -c ${CPPFLAGS} ${CFLAGS} bomstrip.c + ${CC} ${LDFLAGS} -o bomstrip bomstrip.o + sed -e "s@'bomstrip'@/usr/bin/bomstrip@" < debian/bomstrip-files.sh > bomstrip-files + +override_dh_auto_test: +ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) + env BOM=./bomstrip sh test.sh +endif + +%: + dh $@ diff --git a/source/format b/source/format new file mode 100644 index 0000000..163aaf8 --- /dev/null +++ b/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/tests/control b/tests/control new file mode 100644 index 0000000..1f7dac7 --- /dev/null +++ b/tests/control @@ -0,0 +1,5 @@ +Test-Command: env BOM=/usr/bin/bomstrip sh test.sh +Depends: @ + +Test-Command: adequate bomstrip +Depends: @, adequate diff --git a/upstream/metadata b/upstream/metadata new file mode 100644 index 0000000..359774f --- /dev/null +++ b/upstream/metadata @@ -0,0 +1,4 @@ +Name: bomstrip +Contact: Mechiel Lukkien +Changelog: https://www.ueber.net/who/mjl/projects/bomstrip/ +FAQ: https://www.ueber.net/who/mjl/projects/bomstrip/ diff --git a/watch b/watch new file mode 100644 index 0000000..2a9bb58 --- /dev/null +++ b/watch @@ -0,0 +1,3 @@ +version=4 +opts=pgpmode=none \ +https://www.ueber.net/who/mjl/projects/bomstrip/ files/bomstrip-(.*)\.tgz -- cgit v1.2.3 From 689cd5c1f10c12960b78b31d7f2b6934e9158d29 Mon Sep 17 00:00:00 2001 From: Peter Pentchev Date: Wed, 26 Dec 2018 01:33:57 +0200 Subject: Make this program really work and fix some compiler warnings. Forwarded: yes Last-Update: 2010-06-16 - make buf[] and utf8bom[] global to make sure they're zero-initialized and also to save stack space - use a sane size for buf[] - replace the fread() < 0 check with one for ferror() - check the return code of fwrite(), too Gbp-Pq: Name c-warnings.patch --- bomstrip.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/bomstrip.c b/bomstrip.c index 6953414..a5a831a 100755 --- a/bomstrip.c +++ b/bomstrip.c @@ -3,19 +3,22 @@ #include #include -void -usage(char *prog) +char buf[BUFSIZ]; +const char *utf8bom = "\xef\xbb\xbf"; + +static void usage(const char *); + +static void +usage(const char *prog) { fprintf(stderr, "usage: %s\n", prog); exit(1); } int -main(int argc, char *argv[]) +main(int argc, const char * const argv[]) { size_t nread; - char buf[65536]; - char *utf8bom = "\xef\xbb\xbf"; if (argc > 1) usage(argv[0]); @@ -24,14 +27,14 @@ main(int argc, char *argv[]) if (nread == 0) return 0; if (strcmp(buf, utf8bom) != 0) - fwrite(buf, 1, nread, stdout); + if (fwrite(buf, 1, nread, stdout) < nread) + exit(1); for (;;) { nread = fread(buf, 1, sizeof buf, stdin); - if (nread < 0) - exit(1); if (nread == 0) - return 0; - fwrite(buf, 1, nread, stdout); + return ferror(stdin) && 1; + if (fwrite(buf, 1, nread, stdout) < nread) + exit(1); } return 0; } -- cgit v1.2.3 From 73fab346fbba78892600484c35b7e7146993192b Mon Sep 17 00:00:00 2001 From: Peter Pentchev Date: Wed, 26 Dec 2018 01:33:57 +0200 Subject: Correct a typographical error. Forwarded: no Last-Update: 2017-01-09 Gbp-Pq: Name typos.patch --- bomstrip_expl.bf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bomstrip_expl.bf b/bomstrip_expl.bf index 2677922..1bf132f 100755 --- a/bomstrip_expl.bf +++ b/bomstrip_expl.bf @@ -10,10 +10,10 @@ Read a byte of input; if it's minus 1 we treat it as an EOF [>>+<+<-]>> And we put our constant after it; as the number 239 itself is a prime we use 240 which factorizes nicely as 2^4 * 3 * 5 - and substract one from it + and subtract one from it >>>>+++++[<++++>-]<[<++++>-]<[<+++>-]<- - While our constant is not zero we substract one from it and also + While our constant is not zero we subtract one from it and also from the input byte [<->-] @@ -38,11 +38,11 @@ Read a byte of input; if it's minus 1 we treat it as an EOF [>>+<+<-]>> And we put our constant again next to it; - We first calculate 189; which is 3^3 * 7 and substract 2 + We first calculate 189; which is 3^3 * 7 and subtract 2 >>>>+++++++[<+++>-]<[<+++>-]<[<+++>-]<-- The number 0xBB - While our constant is not zero we substract one from it and also + While our constant is not zero we subtract one from it and also from the input byte [<->-] -- cgit v1.2.3 From 3cdb6428817102c9f1b9f6d4d1c171ea65985d06 Mon Sep 17 00:00:00 2001 From: Peter Pentchev Date: Tue, 25 Feb 2020 17:41:03 +0200 Subject: Make this program really work and fix some compiler warnings. Forwarded: yes Last-Update: 2010-06-16 - make buf[] and utf8bom[] global to make sure they're zero-initialized and also to save stack space - use a sane size for buf[] - replace the fread() < 0 check with one for ferror() - check the return code of fwrite(), too Gbp-Pq: Name c-warnings.patch --- bomstrip.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/bomstrip.c b/bomstrip.c index 6953414..a5a831a 100755 --- a/bomstrip.c +++ b/bomstrip.c @@ -3,19 +3,22 @@ #include #include -void -usage(char *prog) +char buf[BUFSIZ]; +const char *utf8bom = "\xef\xbb\xbf"; + +static void usage(const char *); + +static void +usage(const char *prog) { fprintf(stderr, "usage: %s\n", prog); exit(1); } int -main(int argc, char *argv[]) +main(int argc, const char * const argv[]) { size_t nread; - char buf[65536]; - char *utf8bom = "\xef\xbb\xbf"; if (argc > 1) usage(argv[0]); @@ -24,14 +27,14 @@ main(int argc, char *argv[]) if (nread == 0) return 0; if (strcmp(buf, utf8bom) != 0) - fwrite(buf, 1, nread, stdout); + if (fwrite(buf, 1, nread, stdout) < nread) + exit(1); for (;;) { nread = fread(buf, 1, sizeof buf, stdin); - if (nread < 0) - exit(1); if (nread == 0) - return 0; - fwrite(buf, 1, nread, stdout); + return ferror(stdin) && 1; + if (fwrite(buf, 1, nread, stdout) < nread) + exit(1); } return 0; } -- cgit v1.2.3 From e348e7fdefa7ba2f5bd7c5d744826ade8bb8699c Mon Sep 17 00:00:00 2001 From: Peter Pentchev Date: Tue, 25 Feb 2020 17:41:03 +0200 Subject: Correct a typographical error. Forwarded: no Last-Update: 2017-01-09 Gbp-Pq: Name typos.patch --- bomstrip_expl.bf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bomstrip_expl.bf b/bomstrip_expl.bf index 2677922..1bf132f 100755 --- a/bomstrip_expl.bf +++ b/bomstrip_expl.bf @@ -10,10 +10,10 @@ Read a byte of input; if it's minus 1 we treat it as an EOF [>>+<+<-]>> And we put our constant after it; as the number 239 itself is a prime we use 240 which factorizes nicely as 2^4 * 3 * 5 - and substract one from it + and subtract one from it >>>>+++++[<++++>-]<[<++++>-]<[<+++>-]<- - While our constant is not zero we substract one from it and also + While our constant is not zero we subtract one from it and also from the input byte [<->-] @@ -38,11 +38,11 @@ Read a byte of input; if it's minus 1 we treat it as an EOF [>>+<+<-]>> And we put our constant again next to it; - We first calculate 189; which is 3^3 * 7 and substract 2 + We first calculate 189; which is 3^3 * 7 and subtract 2 >>>>+++++++[<+++>-]<[<+++>-]<[<+++>-]<-- The number 0xBB - While our constant is not zero we substract one from it and also + While our constant is not zero we subtract one from it and also from the input byte [<->-] -- cgit v1.2.3