diff options
author | Clint Adams <clint@debian.org> | 2018-12-22 09:24:04 -0500 |
---|---|---|
committer | Clint Adams <clint@debian.org> | 2018-12-22 09:24:04 -0500 |
commit | 678b6305a7d074da7cfb48fccdf7261042a4336d (patch) | |
tree | c0d05d9f8978af415c5cea9ec39fc85b6bfb6cec /sed |
Import sed_4.7.orig.tar.xz
[dgit import orig sed_4.7.orig.tar.xz]
Diffstat (limited to 'sed')
-rw-r--r-- | sed/compile.c | 1689 | ||||
-rw-r--r-- | sed/debug.c | 456 | ||||
-rw-r--r-- | sed/execute.c | 1705 | ||||
-rw-r--r-- | sed/local.mk | 58 | ||||
-rw-r--r-- | sed/mbcs.c | 76 | ||||
-rw-r--r-- | sed/regexp.c | 379 | ||||
-rw-r--r-- | sed/sed.c | 388 | ||||
-rw-r--r-- | sed/sed.h | 293 | ||||
-rw-r--r-- | sed/utils.c | 499 | ||||
-rw-r--r-- | sed/utils.h | 52 |
10 files changed, 5595 insertions, 0 deletions
diff --git a/sed/compile.c b/sed/compile.c new file mode 100644 index 0000000..ca58371 --- /dev/null +++ b/sed/compile.c @@ -0,0 +1,1689 @@ +/* GNU SED, a batch stream editor. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; If not, see <https://www.gnu.org/licenses/>. */ + +/* compile.c: translate sed source into internal form */ + +#include "sed.h" +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <stdlib.h> +#include <sys/types.h> +#include <obstack.h> +#include "progname.h" +#include "xalloc.h" + +#define YMAP_LENGTH 256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/ +#define VECTOR_ALLOC_INCREMENT 40 + +/* let's not confuse text editors that have only dumb bracket-matching... */ +#define OPEN_BRACKET '[' +#define CLOSE_BRACKET ']' +/* #define OPEN_BRACE '{' */ +#define CLOSE_BRACE '}' + +struct prog_info { + /* When we're reading a script command from a string, `prog.base' + points to the first character in the string, 'prog.cur' points + to the current character in the string, and 'prog.end' points + to the end of the string. This allows us to compile script + strings that contain nulls. */ + const unsigned char *base; + const unsigned char *cur; + const unsigned char *end; + + /* This is the current script file. If it is NULL, we are reading + from a string stored at `prog.cur' instead. If both `prog.file' + and `prog.cur' are NULL, we're in trouble! */ + FILE *file; +}; + +/* Information used to give out useful and informative error messages. */ +struct error_info { + /* This is the name of the current script file. */ + const char *name; + + /* This is the number of the current script line that we're compiling. */ + countT line; + + /* This is the index of the "-e" expressions on the command line. */ + countT string_expr_count; +}; + + +/* Label structure used to resolve GOTO's, labels, and block beginnings. */ +struct sed_label { + countT v_index; /* index of vector element being referenced */ + char *name; /* NUL-terminated name of the label */ + struct error_info err_info; /* track where `{}' blocks start */ + struct sed_label *next; /* linked list (stack) */ +}; + +struct special_files { + struct output outf; + FILE **pfp; +}; + +static FILE *my_stdin, *my_stdout, *my_stderr; +static struct special_files special_files[] = { + { { (char *) "/dev/stdin", false, NULL, NULL }, &my_stdin }, + { { (char *) "/dev/stdout", false, NULL, NULL }, &my_stdout }, + { { (char *) "/dev/stderr", false, NULL, NULL }, &my_stderr }, + { { NULL, false, NULL, NULL }, NULL } +}; + +/* Where we are in the processing of the input. */ +static struct prog_info prog; +static struct error_info cur_input; + +/* Information about labels and jumps-to-labels. This is used to do + the required backpatching after we have compiled all the scripts. */ +static struct sed_label *jumps = NULL; +static struct sed_label *labels = NULL; + +/* We wish to detect #n magic only in the first input argument; + this flag tracks when we have consumed the first file of input. */ +static bool first_script = true; + +/* Allow for scripts like "sed -e 'i\' -e foo": */ +static struct buffer *pending_text = NULL; +static struct text_buf *old_text_buf = NULL; + +/* Information about block start positions. This is used to backpatch + block end positions. */ +static struct sed_label *blocks = NULL; + +/* Use an obstack for compilation. */ +static struct obstack obs; + +/* Various error messages we may want to print */ +static const char errors[] = + "multiple `!'s\0" + "unexpected `,'\0" + "invalid usage of +N or ~N as first address\0" + "unmatched `{'\0" + "unexpected `}'\0" + "extra characters after command\0" + "expected \\ after `a', `c' or `i'\0" + "`}' doesn't want any addresses\0" + ": doesn't want any addresses\0" + "comments don't accept any addresses\0" + "missing command\0" + "command only uses one address\0" + "unterminated address regex\0" + "unterminated `s' command\0" + "unterminated `y' command\0" + "unknown option to `s'\0" + "multiple `p' options to `s' command\0" + "multiple `g' options to `s' command\0" + "multiple number options to `s' command\0" + "number option to `s' command may not be zero\0" + "strings for `y' command are different lengths\0" + "delimiter character is not a single-byte character\0" + "expected newer version of sed\0" + "invalid usage of line address 0\0" + "unknown command: `%c'\0" + "incomplete command\0" + "\":\" lacks a label\0" + "recursive escaping after \\c not allowed\0" + "e/r/w commands disabled in sandbox mode\0" + "missing filename in r/R/w/W commands"; + +#define BAD_BANG (errors) +#define BAD_COMMA (BAD_BANG + sizeof (N_("multiple `!'s"))) +#define BAD_STEP (BAD_COMMA + sizeof (N_("unexpected `,'"))) +#define EXCESS_OPEN_BRACE (BAD_STEP \ + + sizeof (N_("invalid usage of +N or ~N as first address"))) +#define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof (N_("unmatched `{'"))) +#define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof (N_("unexpected `}'"))) +#define EXPECTED_SLASH (EXCESS_JUNK \ + + sizeof (N_("extra characters after command"))) +#define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH \ + + sizeof (N_("expected \\ after `a', `c' or `i'"))) +#define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR \ + + sizeof (N_("`}' doesn't want any addresses"))) +#define NO_SHARP_ADDR (NO_COLON_ADDR \ + + sizeof (N_(": doesn't want any addresses"))) +#define NO_COMMAND (NO_SHARP_ADDR \ + + sizeof (N_("comments don't accept any addresses"))) +#define ONE_ADDR (NO_COMMAND + sizeof (N_("missing command"))) +#define UNTERM_ADDR_RE (ONE_ADDR + sizeof (N_("command only uses one address"))) +#define UNTERM_S_CMD (UNTERM_ADDR_RE \ + + sizeof (N_("unterminated address regex"))) +#define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof (N_("unterminated `s' command"))) +#define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof (N_("unterminated `y' command"))) +#define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof (N_("unknown option to `s'"))) +#define EXCESS_G_OPT (EXCESS_P_OPT \ + + sizeof (N_("multiple `p' options to `s' command"))) +#define EXCESS_N_OPT (EXCESS_G_OPT \ + + sizeof (N_("multiple `g' options to `s' command"))) +#define ZERO_N_OPT (EXCESS_N_OPT \ + + sizeof (N_("multiple number options to `s' command"))) +#define Y_CMD_LEN (ZERO_N_OPT \ + + sizeof (N_("number option to `s' command may not be zero"))) +#define BAD_DELIM (Y_CMD_LEN \ + + sizeof (N_("strings for `y' command are different lengths"))) +#define ANCIENT_VERSION (BAD_DELIM \ + + sizeof (N_("delimiter character is not a single-byte character"))) +#define INVALID_LINE_0 (ANCIENT_VERSION \ + + sizeof (N_("expected newer version of sed"))) +#define UNKNOWN_CMD (INVALID_LINE_0 \ + + sizeof (N_("invalid usage of line address 0"))) +#define INCOMPLETE_CMD (UNKNOWN_CMD + sizeof (N_("unknown command: `%c'"))) +#define COLON_LACKS_LABEL (INCOMPLETE_CMD \ + + sizeof (N_("incomplete command"))) +#define RECURSIVE_ESCAPE_C (COLON_LACKS_LABEL \ + + sizeof (N_("\":\" lacks a label"))) +#define DISALLOWED_CMD (RECURSIVE_ESCAPE_C \ + + sizeof (N_("recursive escaping after \\c not allowed"))) +#define MISSING_FILENAME (DISALLOWED_CMD \ + + sizeof (N_( "e/r/w commands disabled in sandbox mode"))) +/* #define END_ERRORS (DISALLOWED_CMD \ + + sizeof (N_( "e/r/w commands disabled in sandbox mode"))) */ + +static struct output *file_read = NULL; +static struct output *file_write = NULL; + +/* Complain about an unknown command and exit. */ +static void +bad_command (char ch) +{ + const char *msg = _(UNKNOWN_CMD); + char *unknown_cmd = xmalloc (strlen (msg)); + sprintf (unknown_cmd, msg, ch); + bad_prog (unknown_cmd); +} + +/* Complain about a programming error and exit. */ +void +bad_prog (const char *why) +{ + if (cur_input.name) + fprintf (stderr, _("%s: file %s line %lu: %s\n"), program_name, + cur_input.name, (unsigned long)cur_input.line, why); + else + fprintf (stderr, _("%s: -e expression #%lu, char %lu: %s\n"), + program_name, + (unsigned long)cur_input.string_expr_count, + (unsigned long)(prog.cur-prog.base), + why); + exit (EXIT_BAD_USAGE); +} + +/* Read the next character from the program. Return EOF if there isn't + anything to read. Keep cur_input.line up to date, so error messages + can be meaningful. */ +static int +inchar (void) +{ + int ch = EOF; + + if (prog.cur) + { + if (prog.cur < prog.end) + ch = *prog.cur++; + } + else if (prog.file) + { + if (!feof (prog.file)) + ch = getc (prog.file); + } + if (ch == '\n') + ++cur_input.line; + return ch; +} + +/* unget `ch' so the next call to inchar will return it. */ +static void +savchar (int ch) +{ + if (ch == EOF) + return; + if (ch == '\n' && cur_input.line > 0) + --cur_input.line; + if (prog.cur) + { + if (prog.cur <= prog.base || *--prog.cur != ch) + panic ("Called savchar with unexpected pushback (%x)", + (unsigned int) ch); + } + else + ungetc (ch, prog.file); +} + +/* Read the next non-blank character from the program. */ +static int +in_nonblank (void) +{ + int ch; + do + ch = inchar (); + while (ISBLANK (ch)); + return ch; +} + +/* Consume script input until a valid end of command marker is found: + comment, closing brace, newline, semicolon or EOF. + If any other character is found, die with 'extra characters after command' + error. +*/ +static void +read_end_of_cmd (void) +{ + const int ch = in_nonblank (); + if (ch == CLOSE_BRACE || ch == '#') + savchar (ch); + else if (ch != EOF && ch != '\n' && ch != ';') + bad_prog (_(EXCESS_JUNK)); +} + +/* Read an integer value from the program. */ +static countT +in_integer (int ch) +{ + countT num = 0; + + while (ISDIGIT (ch)) + { + num = num * 10 + ch - '0'; + ch = inchar (); + } + savchar (ch); + return num; +} + +static int +add_then_next (struct buffer *b, int ch) +{ + add1_buffer (b, ch); + return inchar (); +} + +static char * +convert_number (char *result, char *buf, const char *bufend, int base) +{ + int n = 0; + int max = 1; + char *p; + + for (p=buf+1; p < bufend && max <= 255; ++p, max *= base) + { + int d = -1; + switch (*p) + { + case '0': d = 0x0; break; + case '1': d = 0x1; break; + case '2': d = 0x2; break; + case '3': d = 0x3; break; + case '4': d = 0x4; break; + case '5': d = 0x5; break; + case '6': d = 0x6; break; + case '7': d = 0x7; break; + case '8': d = 0x8; break; + case '9': d = 0x9; break; + case 'A': case 'a': d = 0xa; break; + case 'B': case 'b': d = 0xb; break; + case 'C': case 'c': d = 0xc; break; + case 'D': case 'd': d = 0xd; break; + case 'E': case 'e': d = 0xe; break; + case 'F': case 'f': d = 0xf; break; + } + if (d < 0 || base <= d) + break; + n = n * base + d; + } + if (p == buf+1) + *result = *buf; + else + *result = n; + return p; +} + +/* Read in a filename for a `r', `w', or `s///w' command. */ +static struct buffer * +read_filename (void) +{ + struct buffer *b; + int ch; + + if (sandbox) + bad_prog (_(DISALLOWED_CMD)); + + b = init_buffer (); + ch = in_nonblank (); + while (ch != EOF && ch != '\n') + { +#if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/ + if (posixicity == POSIXLY_EXTENDED) + if (ch == ';' || ch == '#') + { + savchar (ch); + break; + } +#endif + ch = add_then_next (b, ch); + } + add1_buffer (b, '\0'); + return b; +} + +static struct output * +get_openfile (struct output **file_ptrs, const char *mode, int fail) +{ + struct buffer *b; + char *file_name; + struct output *p; + + b = read_filename (); + file_name = get_buffer (b); + if (strlen (file_name) == 0) + bad_prog (_(MISSING_FILENAME)); + + for (p=*file_ptrs; p; p=p->link) + if (strcmp (p->name, file_name) == 0) + break; + + if (posixicity == POSIXLY_EXTENDED) + { + /* Check whether it is a special file (stdin, stdout or stderr) */ + struct special_files *special = special_files; + + /* std* sometimes are not constants, so they + cannot be used in the initializer for special_files */ + my_stdin = stdin; my_stdout = stdout; my_stderr = stderr; + for (special = special_files; special->outf.name; special++) + if (strcmp (special->outf.name, file_name) == 0) + { + special->outf.fp = *special->pfp; + free_buffer (b); + return &special->outf; + } + } + + if (!p) + { + p = OB_MALLOC (&obs, 1, struct output); + p->name = xstrdup (file_name); + p->fp = ck_fopen (p->name, mode, fail); + p->missing_newline = false; + p->link = *file_ptrs; + *file_ptrs = p; + } + free_buffer (b); + return p; +} + +static struct sed_cmd * +next_cmd_entry (struct vector **vectorp) +{ + struct sed_cmd *cmd; + struct vector *v; + + v = *vectorp; + if (v->v_length == v->v_allocated) + { + v->v_allocated += VECTOR_ALLOC_INCREMENT; + v->v = REALLOC (v->v, v->v_allocated, struct sed_cmd); + } + + cmd = v->v + v->v_length; + cmd->a1 = NULL; + cmd->a2 = NULL; + cmd->range_state = RANGE_INACTIVE; + cmd->addr_bang = false; + cmd->cmd = '\0'; /* something invalid, to catch bugs early */ + + *vectorp = v; + return cmd; +} + +static int +snarf_char_class (struct buffer *b, mbstate_t *cur_stat) +{ + int ch; + int state = 0; + int delim IF_LINT ( = 0) ; + + ch = inchar (); + if (ch == '^') + ch = add_then_next (b, ch); + if (ch == CLOSE_BRACKET) + ch = add_then_next (b, ch); + + /* States are: + 0 outside a collation element, character class or collation class + 1 after the bracket + 2 after the opening ./:/= + 3 after the closing ./:/= */ + + for (;; ch = add_then_next (b, ch)) + { + const int mb_char = IS_MB_CHAR (ch, cur_stat); + + switch (ch) + { + case EOF: + case '\n': + return ch; + + case '.': + case ':': + case '=': + if (mb_char) + continue; + + if (state == 1) + { + delim = ch; + state = 2; + } + else if (state == 2 && ch == delim) + state = 3; + else + break; + + continue; + + case OPEN_BRACKET: + if (mb_char) + continue; + + if (state == 0) + state = 1; + continue; + + case CLOSE_BRACKET: + if (mb_char) + continue; + + if (state == 0 || state == 1) + return ch; + else if (state == 3) + state = 0; + + break; + + default: + break; + } + + /* Getting a character different from .=: whilst in state 1 + goes back to state 0, getting a character different from ] + whilst in state 3 goes back to state 2. */ + state &= ~1; + } +} + +static struct buffer * +match_slash (int slash, int regex) +{ + struct buffer *b; + int ch; + mbstate_t cur_stat = { 0, }; + + /* We allow only 1 byte characters for a slash. */ + if (IS_MB_CHAR (slash, &cur_stat)) + bad_prog (BAD_DELIM); + + memset (&cur_stat, 0, sizeof cur_stat); + + b = init_buffer (); + while ((ch = inchar ()) != EOF && ch != '\n') + { + const int mb_char = IS_MB_CHAR (ch, &cur_stat); + + if (!mb_char) + { + if (ch == slash) + return b; + else if (ch == '\\') + { + ch = inchar (); + if (ch == EOF) + break; + else if (ch == 'n' && regex) + ch = '\n'; + else if (ch != '\n' && (ch != slash || (!regex && ch == '&'))) + add1_buffer (b, '\\'); + } + else if (ch == OPEN_BRACKET && regex) + { + add1_buffer (b, ch); + ch = snarf_char_class (b, &cur_stat); + if (ch != CLOSE_BRACKET) + break; + } + } + + add1_buffer (b, ch); + } + + if (ch == '\n') + savchar (ch); /* for proper line number in error report */ + free_buffer (b); + return NULL; +} + +static int +mark_subst_opts (struct subst *cmd) +{ + int flags = 0; + int ch; + + cmd->global = false; + cmd->print = false; + cmd->eval = false; + cmd->numb = 0; + cmd->outf = NULL; + + for (;;) + switch ( (ch = in_nonblank ()) ) + { + case 'i': /* GNU extension */ + case 'I': /* GNU extension */ + if (posixicity == POSIXLY_BASIC) + bad_prog (_(UNKNOWN_S_OPT)); + flags |= REG_ICASE; + break; + + case 'm': /* GNU extension */ + case 'M': /* GNU extension */ + if (posixicity == POSIXLY_BASIC) + bad_prog (_(UNKNOWN_S_OPT)); + flags |= REG_NEWLINE; + break; + + case 'e': + if (posixicity == POSIXLY_BASIC) + bad_prog (_(UNKNOWN_S_OPT)); + cmd->eval = true; + break; + + case 'p': + if (cmd->print) + bad_prog (_(EXCESS_P_OPT)); + cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */ + break; + + case 'g': + if (cmd->global) + bad_prog (_(EXCESS_G_OPT)); + cmd->global = true; + break; + + case 'w': + cmd->outf = get_openfile (&file_write, write_mode, true); + return flags; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (cmd->numb) + bad_prog (_(EXCESS_N_OPT)); + cmd->numb = in_integer (ch); + if (!cmd->numb) + bad_prog (_(ZERO_N_OPT)); + break; + + case CLOSE_BRACE: + case '#': + savchar (ch); + FALLTHROUGH; + case EOF: + case '\n': + case ';': + return flags; + + case '\r': + if (inchar () == '\n') + return flags; + FALLTHROUGH; + + default: + bad_prog (_(UNKNOWN_S_OPT)); + /*NOTREACHED*/ + } +} + +/* read in a label for a `:', `b', or `t' command */ +static char * _GL_ATTRIBUTE_MALLOC +read_label (void) +{ + struct buffer *b; + int ch; + char *ret; + + b = init_buffer (); + ch = in_nonblank (); + + while (ch != EOF && ch != '\n' + && !ISBLANK (ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#') + ch = add_then_next (b, ch); + + savchar (ch); + add1_buffer (b, '\0'); + ret = xstrdup (get_buffer (b)); + free_buffer (b); + return ret; +} + +/* Store a label (or label reference) created by a `:', `b', or `t' + command so that the jump to/from the label can be backpatched after + compilation is complete, or a reference created by a `{' to be + backpatched when the corresponding `}' is found. */ +static struct sed_label * +setup_label (struct sed_label *list, countT idx, char *name, + const struct error_info *err_info) +{ + struct sed_label *ret = OB_MALLOC (&obs, 1, struct sed_label); + ret->v_index = idx; + ret->name = name; + if (err_info) + memcpy (&ret->err_info, err_info, sizeof (ret->err_info)); + ret->next = list; + return ret; +} + +static struct sed_label * +release_label (struct sed_label *list_head) +{ + struct sed_label *ret; + + if (!list_head) + return NULL; + ret = list_head->next; + + free (list_head->name); + +#if 0 + /* We use obstacks */ + free (list_head); +#endif + return ret; +} + +static struct replacement * +new_replacement (char *text, size_t length, enum replacement_types type) +{ + struct replacement *r = OB_MALLOC (&obs, 1, struct replacement); + + r->prefix = text; + r->prefix_length = length; + r->subst_id = -1; + r->repl_type = type; + + /* r-> next = NULL; */ + return r; +} + +static void +setup_replacement (struct subst *sub, const char *text, size_t length) +{ + char *base; + char *p; + char *text_end; + enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS; + struct replacement root; + struct replacement *tail; + + sub->max_id = 0; + base = MEMDUP (text, length, char); + length = normalize_text (base, length, TEXT_REPLACEMENT); + + IF_LINT (sub->replacement_buffer = base); + + text_end = base + length; + tail = &root; + + for (p=base; p<text_end; ++p) + { + if (*p == '\\') + { + /* Preceding the backslash may be some literal text: */ + tail = tail->next = + new_replacement (base, (size_t)(p - base), repl_type); + + repl_type = save_type; + + /* Skip the backslash and look for a numeric back-reference, + or a case-munging escape if not in POSIX mode: */ + ++p; + if (p == text_end) + ++tail->prefix_length; + + else if (posixicity == POSIXLY_BASIC && !ISDIGIT (*p)) + { + p[-1] = *p; + ++tail->prefix_length; + } + + else + switch (*p) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + tail->subst_id = *p - '0'; + if (sub->max_id < tail->subst_id) + sub->max_id = tail->subst_id; + break; + + case 'L': + repl_type = REPL_LOWERCASE; + save_type = REPL_LOWERCASE; + break; + + case 'U': + repl_type = REPL_UPPERCASE; + save_type = REPL_UPPERCASE; + break; + + case 'E': + repl_type = REPL_ASIS; + save_type = REPL_ASIS; + break; + + case 'l': + save_type = repl_type; + repl_type |= REPL_LOWERCASE_FIRST; + break; + + case 'u': + save_type = repl_type; + repl_type |= REPL_UPPERCASE_FIRST; + break; + + default: + p[-1] = *p; + ++tail->prefix_length; + } + + base = p + 1; + } + else if (*p == '&') + { + /* Preceding the ampersand may be some literal text: */ + tail = tail->next = + new_replacement (base, (size_t)(p - base), repl_type); + + repl_type = save_type; + tail->subst_id = 0; + base = p + 1; + } + } + /* There may be some trailing literal text: */ + if (base < text_end) + tail = tail->next = + new_replacement (base, (size_t)(text_end - base), repl_type); + + tail->next = NULL; + sub->replacement = root.next; +} + +static void +read_text (struct text_buf *buf, int leadin_ch) +{ + int ch; + + /* Should we start afresh (as opposed to continue a partial text)? */ + if (buf) + { + if (pending_text) + free_buffer (pending_text); + pending_text = init_buffer (); + buf->text = NULL; + buf->text_length = 0; + old_text_buf = buf; + } + /* assert(old_text_buf != NULL); */ + + if (leadin_ch == EOF) + return; + + if (leadin_ch != '\n') + add1_buffer (pending_text, leadin_ch); + + ch = inchar (); + while (ch != EOF && ch != '\n') + { + if (ch == '\\') + { + ch = inchar (); + if (ch != EOF) + add1_buffer (pending_text, '\\'); + } + + if (ch == EOF) + { + add1_buffer (pending_text, '\n'); + return; + } + + ch = add_then_next (pending_text, ch); + } + + add1_buffer (pending_text, '\n'); + if (!buf) + buf = old_text_buf; + buf->text_length = normalize_text (get_buffer (pending_text), + size_buffer (pending_text), TEXT_BUFFER); + buf->text = MEMDUP (get_buffer (pending_text), buf->text_length, char); + free_buffer (pending_text); + pending_text = NULL; +} + +/* Try to read an address for a sed command. If it succeeds, + return non-zero and store the resulting address in `*addr'. + If the input doesn't look like an address read nothing + and return zero. */ +static bool +compile_address (struct addr *addr, int ch) +{ + addr->addr_type = ADDR_IS_NULL; + addr->addr_step = 0; + addr->addr_number = ~(countT)0; /* extremely unlikely to ever match */ + addr->addr_regex = NULL; + + if (ch == '/' || ch == '\\') + { + int flags = 0; + struct buffer *b; + addr->addr_type = ADDR_IS_REGEX; + if (ch == '\\') + ch = inchar (); + if ( !(b = match_slash (ch, true)) ) + bad_prog (_(UNTERM_ADDR_RE)); + + for (;;) + { + ch = in_nonblank (); + if (posixicity == POSIXLY_BASIC) + goto posix_address_modifier; + switch (ch) + { + case 'I': /* GNU extension */ + flags |= REG_ICASE; + break; + + case 'M': /* GNU extension */ + flags |= REG_NEWLINE; + break; + + default: + posix_address_modifier: + savchar (ch); + addr->addr_regex = compile_regex (b, flags, 0); + free_buffer (b); + return true; + } + } + } + else if (ISDIGIT (ch)) + { + addr->addr_number = in_integer (ch); + addr->addr_type = ADDR_IS_NUM; + ch = in_nonblank (); + if (ch != '~' || posixicity == POSIXLY_BASIC) + { + savchar (ch); + } + else + { + countT step = in_integer (in_nonblank ()); + if (step > 0) + { + addr->addr_step = step; + addr->addr_type = ADDR_IS_NUM_MOD; + } + } + } + else if ((ch == '+' || ch == '~') && posixicity != POSIXLY_BASIC) + { + addr->addr_step = in_integer (in_nonblank ()); + if (addr->addr_step==0) + ; /* default to ADDR_IS_NULL; forces matching to stop on next line */ + else if (ch == '+') + addr->addr_type = ADDR_IS_STEP; + else + addr->addr_type = ADDR_IS_STEP_MOD; + } + else if (ch == '$') + { + addr->addr_type = ADDR_IS_LAST; + } + else + return false; + + return true; +} + +/* Read a program (or a subprogram within `{' `}' pairs) in and store + the compiled form in `*vector'. Return a pointer to the new vector. */ +static struct vector * +compile_program (struct vector *vector) +{ + struct sed_cmd *cur_cmd; + struct buffer *b; + int ch; + + if (!vector) + { + vector = XCALLOC (1, struct vector); + vector->v = NULL; + vector->v_allocated = 0; + vector->v_length = 0; + + obstack_init (&obs); + } + if (pending_text) + read_text (NULL, '\n'); + + for (;;) + { + struct addr a; + + while ((ch=inchar ()) == ';' || ISSPACE (ch)) + ; + if (ch == EOF) + break; + + cur_cmd = next_cmd_entry (&vector); + if (compile_address (&a, ch)) + { + if (a.addr_type == ADDR_IS_STEP + || a.addr_type == ADDR_IS_STEP_MOD) + bad_prog (_(BAD_STEP)); + + cur_cmd->a1 = MEMDUP (&a, 1, struct addr); + ch = in_nonblank (); + if (ch == ',') + { + if (!compile_address (&a, in_nonblank ())) + bad_prog (_(BAD_COMMA)); + + cur_cmd->a2 = MEMDUP (&a, 1, struct addr); + ch = in_nonblank (); + } + + if ((cur_cmd->a1->addr_type == ADDR_IS_NUM + && cur_cmd->a1->addr_number == 0) + && ((!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX) + || posixicity == POSIXLY_BASIC)) + bad_prog (_(INVALID_LINE_0)); + } + if (ch == '!') + { + cur_cmd->addr_bang = true; + ch = in_nonblank (); + if (ch == '!') + bad_prog (_(BAD_BANG)); + } + + /* Do not accept extended commands in --posix mode. Also, + a few commands only accept one address in that mode. */ + if (posixicity == POSIXLY_BASIC) + switch (ch) + { + case 'e': case 'F': case 'v': case 'z': case 'L': + case 'Q': case 'T': case 'R': case 'W': + bad_command (ch); + FALLTHROUGH; + + case 'a': case 'i': case 'l': + case '=': case 'r': + if (cur_cmd->a2) + bad_prog (_(ONE_ADDR)); + } + + cur_cmd->cmd = ch; + switch (ch) + { + case '#': + if (cur_cmd->a1) + bad_prog (_(NO_SHARP_ADDR)); + ch = inchar (); + if (ch=='n' && first_script && cur_input.line < 2) + if ( (prog.base && prog.cur==2+prog.base) + || (prog.file && !prog.base && 2==ftell (prog.file))) + no_default_output = true; + while (ch != EOF && ch != '\n') + ch = inchar (); + continue; /* restart the for (;;) loop */ + + case 'v': + /* This is an extension. Programs needing GNU sed might start + * with a `v' command so that other seds will stop. + * We compare the version and ignore POSIXLY_CORRECT. + */ + { + char *version = read_label (); + char const *compared_version; + compared_version = (*version == '\0') ? "4.0" : version; + if (strverscmp (compared_version, PACKAGE_VERSION) > 0) + bad_prog (_(ANCIENT_VERSION)); + + free (version); + posixicity = POSIXLY_EXTENDED; + } + continue; + + case '{': + blocks = setup_label (blocks, vector->v_length, NULL, &cur_input); + cur_cmd->addr_bang = !cur_cmd->addr_bang; + break; + + case '}': + if (!blocks) + bad_prog (_(EXCESS_CLOSE_BRACE)); + if (cur_cmd->a1) + bad_prog (_(NO_CLOSE_BRACE_ADDR)); + + read_end_of_cmd (); + + vector->v[blocks->v_index].x.jump_index = vector->v_length; + blocks = release_label (blocks); /* done with this entry */ + break; + + case 'e': + if (sandbox) + bad_prog (_(DISALLOWED_CMD)); + + ch = in_nonblank (); + if (ch == EOF || ch == '\n') + { + cur_cmd->x.cmd_txt.text_length = 0; + break; + } + else + goto read_text_to_slash; + + case 'a': + case 'i': + case 'c': + ch = in_nonblank (); + + read_text_to_slash: + if (ch == EOF) + bad_prog (_(EXPECTED_SLASH)); + + if (ch == '\\') + ch = inchar (); + else + { + if (posixicity == POSIXLY_BASIC) + bad_prog (_(EXPECTED_SLASH)); + savchar (ch); + ch = '\n'; + } + + read_text (&cur_cmd->x.cmd_txt, ch); + break; + + case ':': + if (cur_cmd->a1) + bad_prog (_(NO_COLON_ADDR)); + { + char *label = read_label (); + if (!*label) + bad_prog (_(COLON_LACKS_LABEL)); + labels = setup_label (labels, vector->v_length, label, NULL); + + if (debug) + cur_cmd->x.label_name = strdup (label); + } + break; + + case 'T': + case 'b': + case 't': + jumps = setup_label (jumps, vector->v_length, read_label (), NULL); + break; + + case 'Q': + case 'q': + if (cur_cmd->a2) + bad_prog (_(ONE_ADDR)); + FALLTHROUGH; + + case 'L': + case 'l': + ch = in_nonblank (); + if (ISDIGIT (ch) && posixicity != POSIXLY_BASIC) + { + cur_cmd->x.int_arg = in_integer (ch); + } + else + { + cur_cmd->x.int_arg = -1; + savchar (ch); + } + + read_end_of_cmd (); + break; + + case '=': + case 'd': + case 'D': + case 'F': + case 'g': + case 'G': + case 'h': + case 'H': + case 'n': + case 'N': + case 'p': + case 'P': + case 'z': + case 'x': + read_end_of_cmd (); + break; + + case 'r': + b = read_filename (); + if (strlen (get_buffer (b)) == 0) + bad_prog (_(MISSING_FILENAME)); + cur_cmd->x.fname = xstrdup (get_buffer (b)); + free_buffer (b); + break; + + case 'R': + cur_cmd->x.inf = get_openfile (&file_read, read_mode, false); + break; + + case 'W': + case 'w': + cur_cmd->x.outf = get_openfile (&file_write, write_mode, true); + break; + + case 's': + { + struct buffer *b2; + int flags; + int slash; + + slash = inchar (); + if ( !(b = match_slash (slash, true)) ) + bad_prog (_(UNTERM_S_CMD)); + if ( !(b2 = match_slash (slash, false)) ) + bad_prog (_(UNTERM_S_CMD)); + + cur_cmd->x.cmd_subst = OB_MALLOC (&obs, 1, struct subst); + setup_replacement (cur_cmd->x.cmd_subst, + get_buffer (b2), size_buffer (b2)); + free_buffer (b2); + + flags = mark_subst_opts (cur_cmd->x.cmd_subst); + cur_cmd->x.cmd_subst->regx = + compile_regex (b, flags, cur_cmd->x.cmd_subst->max_id + 1); + free_buffer (b); + + if (cur_cmd->x.cmd_subst->eval && sandbox) + bad_prog (_(DISALLOWED_CMD)); + } + break; + + case 'y': + { + size_t len, dest_len; + int slash; + struct buffer *b2; + char *src_buf, *dest_buf; + + slash = inchar (); + if ( !(b = match_slash (slash, false)) ) + bad_prog (_(UNTERM_Y_CMD)); + src_buf = get_buffer (b); + len = normalize_text (src_buf, size_buffer (b), TEXT_BUFFER); + + if ( !(b2 = match_slash (slash, false)) ) + bad_prog (_(UNTERM_Y_CMD)); + dest_buf = get_buffer (b2); + dest_len = normalize_text (dest_buf, size_buffer (b2), TEXT_BUFFER); + + if (mb_cur_max > 1) + { + size_t i, j, idx, src_char_num; + size_t *src_lens = XCALLOC (len, size_t); + char **trans_pairs; + size_t mbclen; + mbstate_t cur_stat = { 0, }; + + /* Enumerate how many character the source buffer has. */ + for (i = 0, j = 0; i < len;) + { + mbclen = MBRLEN (src_buf + i, len - i, &cur_stat); + /* An invalid sequence, or a truncated multibyte character. + We treat it as a single-byte character. */ + if (mbclen == (size_t) -1 || mbclen == (size_t) -2 + || mbclen == 0) + mbclen = 1; + src_lens[j++] = mbclen; + i += mbclen; + } + src_char_num = j; + + memset (&cur_stat, 0, sizeof cur_stat); + idx = 0; + + /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL} + src(i) : pointer to i-th source character. + dest(i) : pointer to i-th destination character. + NULL : terminator */ + trans_pairs = XCALLOC (2 * src_char_num + 1, char*); + cur_cmd->x.translatemb = trans_pairs; + for (i = 0; i < src_char_num; i++) + { + if (idx >= dest_len) + bad_prog (_(Y_CMD_LEN)); + + /* Set the i-th source character. */ + trans_pairs[2 * i] = XCALLOC (src_lens[i] + 1, char); + memcpy (trans_pairs[2 * i], src_buf, src_lens[i]); + trans_pairs[2 * i][src_lens[i]] = '\0'; + src_buf += src_lens[i]; /* Forward to next character. */ + + /* Fetch the i-th destination character. */ + mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat); + /* An invalid sequence, or a truncated multibyte character. + We treat it as a single-byte character. */ + if (mbclen == (size_t) -1 || mbclen == (size_t) -2 + || mbclen == 0) + mbclen = 1; + + /* Set the i-th destination character. */ + trans_pairs[2 * i + 1] = XCALLOC (mbclen + 1, char); + memcpy (trans_pairs[2 * i + 1], dest_buf + idx, mbclen); + trans_pairs[2 * i + 1][mbclen] = '\0'; + idx += mbclen; /* Forward to next character. */ + } + trans_pairs[2 * i] = NULL; + if (idx != dest_len) + bad_prog (_(Y_CMD_LEN)); + + IF_LINT (free (src_lens)); + } + else + { + unsigned char *translate = + OB_MALLOC (&obs, YMAP_LENGTH, unsigned char); + unsigned char *ustring = (unsigned char *)src_buf; + + if (len != dest_len) + bad_prog (_(Y_CMD_LEN)); + + for (len = 0; len < YMAP_LENGTH; len++) + translate[len] = len; + + while (dest_len--) + translate[*ustring++] = (unsigned char)*dest_buf++; + + cur_cmd->x.translate = translate; + } + + read_end_of_cmd (); + + free_buffer (b); + free_buffer (b2); + } + break; + + case EOF: + bad_prog (_(NO_COMMAND)); + /*NOTREACHED*/ + + default: + bad_command (ch); + /*NOTREACHED*/ + } + + /* this is buried down here so that "continue" statements will miss it */ + ++vector->v_length; + } + if (posixicity == POSIXLY_BASIC && pending_text) + bad_prog (_(INCOMPLETE_CMD)); + return vector; +} + +/* deal with \X escapes */ +size_t +normalize_text (char *buf, size_t len, enum text_types buftype) +{ + const char *bufend = buf + len; + char *p = buf; + char *q = buf; + char ch; + int base; + + /* This variable prevents normalizing text within bracket + subexpressions when conforming to POSIX. If 0, we + are not within a bracket expression. If -1, we are within a + bracket expression but are not within [.FOO.], [=FOO=], + or [:FOO:]. Otherwise, this is the '.', '=', or ':' + respectively within these three types of subexpressions. */ + int bracket_state = 0; + + int mbclen; + mbstate_t cur_stat = { 0, }; + + while (p < bufend) + { + mbclen = MBRLEN (p, bufend - p, &cur_stat); + if (mbclen != 1) + { + /* An invalid sequence, or a truncated multibyte character. + We treat it as a single-byte character. */ + if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) + mbclen = 1; + + memmove (q, p, mbclen); + q += mbclen; + p += mbclen; + continue; + } + + if (*p == '\\' && p+1 < bufend && bracket_state == 0) + switch (*++p) + { +#if defined __STDC__ && __STDC__-0 + case 'a': *q++ = '\a'; p++; continue; +#else /* Not STDC; we'll just assume ASCII */ + case 'a': *q++ = '\007'; p++; continue; +#endif + /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */ + case 'f': *q++ = '\f'; p++; continue; + case '\n': /*fall through */ + case 'n': *q++ = '\n'; p++; continue; + case 'r': *q++ = '\r'; p++; continue; + case 't': *q++ = '\t'; p++; continue; + case 'v': *q++ = '\v'; p++; continue; + + case 'd': /* decimal byte */ + base = 10; + goto convert; + + case 'x': /* hexadecimal byte */ + base = 16; + goto convert; + + case 'o': /* octal byte */ + base = 8; +convert: + p = convert_number (&ch, p, bufend, base); + + /* for an ampersand in a replacement, pass the \ up one level */ + if (buftype == TEXT_REPLACEMENT && (ch == '&' || ch == '\\')) + *q++ = '\\'; + *q++ = ch; + continue; + + case 'c': + if (++p < bufend) + { + *q++ = toupper ((unsigned char) *p) ^ 0x40; + if (*p == '\\') + { + p++; + if (*p != '\\') + bad_prog (RECURSIVE_ESCAPE_C); + } + p++; + continue; + } + else + { + /* we just pass the \ up one level for interpretation */ + if (buftype != TEXT_BUFFER) + *q++ = '\\'; + continue; + } + + default: + /* we just pass the \ up one level for interpretation */ + if (buftype != TEXT_BUFFER) + *q++ = '\\'; + break; + } + else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED) + switch (*p) + { + case '[': + if (!bracket_state) + bracket_state = -1; + break; + + case ':': + case '.': + case '=': + if (bracket_state == -1 && p[-1] == '[') + bracket_state = *p; + break; + + case ']': + if (bracket_state == 0) + ; + else if (bracket_state == -1) + bracket_state = 0; + else if (p[-2] != bracket_state && p[-1] == bracket_state) + bracket_state = -1; + break; + } + + *q++ = *p++; + } + return (size_t)(q - buf); +} + + +/* `str' is a string (from the command line) that contains a sed command. + Compile the command, and add it to the end of `cur_program'. */ +struct vector * +compile_string (struct vector *cur_program, char *str, size_t len) +{ + static countT string_expr_count = 0; + struct vector *ret; + + prog.file = NULL; + prog.base = (unsigned char *)str; + prog.cur = prog.base; + prog.end = prog.cur + len; + + cur_input.line = 0; + cur_input.name = NULL; + cur_input.string_expr_count = ++string_expr_count; + + ret = compile_program (cur_program); + prog.base = NULL; + prog.cur = NULL; + prog.end = NULL; + + first_script = false; + return ret; +} + +/* `cmdfile' is the name of a file containing sed commands. + Read them in and add them to the end of `cur_program'. + */ +struct vector * +compile_file (struct vector *cur_program, const char *cmdfile) +{ + struct vector *ret; + + prog.file = stdin; + if (cmdfile[0] != '-' || cmdfile[1] != '\0') + { +#ifdef HAVE_FOPEN_RT + prog.file = ck_fopen (cmdfile, "rt", true); +#else + prog.file = ck_fopen (cmdfile, "r", true); +#endif + } + + cur_input.line = 1; + cur_input.name = cmdfile; + cur_input.string_expr_count = 0; + + ret = compile_program (cur_program); + if (prog.file != stdin) + ck_fclose (prog.file); + prog.file = NULL; + + first_script = false; + return ret; +} + +static void +cleanup_program_filenames (void) +{ + { + struct output *p; + + for (p = file_read; p; p = p->link) + if (p->name) + { + free (p->name); + p->name = NULL; + } + + for (p = file_write; p; p = p->link) + if (p->name) + { + free (p->name); + p->name = NULL; + } + } +} + +/* Make any checks which require the whole program to have been read. + In particular: this backpatches the jump targets. + Any cleanup which can be done after these checks is done here also. */ +void +check_final_program (struct vector *program) +{ + struct sed_label *go; + struct sed_label *lbl; + + /* do all "{"s have a corresponding "}"? */ + if (blocks) + { + /* update info for error reporting: */ + memcpy (&cur_input, &blocks->err_info, sizeof (cur_input)); + bad_prog (_(EXCESS_OPEN_BRACE)); + } + + /* was the final command an unterminated a/c/i command? */ + if (pending_text) + { + old_text_buf->text_length = size_buffer (pending_text); + if (old_text_buf->text_length) + old_text_buf->text = MEMDUP (get_buffer (pending_text), + old_text_buf->text_length, char); + free_buffer (pending_text); + pending_text = NULL; + } + + for (go = jumps; go; go = release_label (go)) + { + for (lbl = labels; lbl; lbl = lbl->next) + if (strcmp (lbl->name, go->name) == 0) + break; + if (lbl) + { + program->v[go->v_index].x.jump_index = lbl->v_index; + } + else + { + if (*go->name) + panic (_("can't find label for jump to `%s'"), go->name); + program->v[go->v_index].x.jump_index = program->v_length; + } + } + jumps = NULL; + + for (lbl = labels; lbl; lbl = release_label (lbl)) + ; + labels = NULL; +} + + +/* Rewind all resources which were allocated in this module. */ +void +rewind_read_files (void) +{ + struct output *p; + + for (p=file_read; p; p=p->link) + if (p->fp) + rewind (p->fp); +} + +/* Release all resources which were allocated in this module. */ +void +finish_program (struct vector *program) +{ + cleanup_program_filenames (); + + /* close all files... */ + { + struct output *p, *q; + + for (p=file_read; p; p=q) + { + if (p->fp) + ck_fclose (p->fp); + q = p->link; +#if 0 + /* We use obstacks. */ + free (p); +#endif + } + + for (p=file_write; p; p=q) + { + if (p->fp) + ck_fclose (p->fp); + q = p->link; +#if 0 + /* We use obstacks. */ + free (p); +#endif + } + file_read = file_write = NULL; + } + +#ifdef lint + for (int i = 0; i < program->v_length; ++i) + { + const struct sed_cmd *sc = &program->v[i]; + + if (sc->a1 && sc->a1->addr_regex) + release_regex (sc->a1->addr_regex); + if (sc->a2 && sc->a2->addr_regex) + release_regex (sc->a2->addr_regex); + + switch (sc->cmd) + { + case 's': + free (sc->x.cmd_subst->replacement_buffer); + if (sc->x.cmd_subst->regx) + release_regex (sc->x.cmd_subst->regx); + break; + } + } + + obstack_free (&obs, NULL); +#else + (void)program; +#endif /* lint */ + +} diff --git a/sed/debug.c b/sed/debug.c new file mode 100644 index 0000000..9ec37b6 --- /dev/null +++ b/sed/debug.c @@ -0,0 +1,456 @@ +/* GNU SED, a batch stream editor. + Copyright (C) 2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; If not, see <https://www.gnu.org/licenses/>. */ + +/* Written by Assaf Gordon. */ + +/* debug.c: debugging functions */ + +#include "sed.h" +#include "basicdefs.h" +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <stdlib.h> +#include <sys/types.h> +#include <assert.h> + +/* indentation level when printing the program */ +static int block_level = 0; + + +void +debug_print_char (char c) +{ + if (ISPRINT (c) && c != '\\') + { + putchar (c); + return; + } + + putchar ('\\'); + switch (c) + { + case '\a': + putchar ('a'); + break; + case '\f': + putchar ('f'); + break; + case '\r': + putchar ('r'); + break; + case '\t': + putchar ('t'); + break; + case '\v': + putchar ('v'); + break; + case '\n': + putchar ('n'); + break; + case '\\': + putchar ('\\'); + break; + + default: + printf ("o%03o", (unsigned int) c); + } +} + +static void +debug_print_regex_pattern (const char *pat, size_t len) +{ + const char *p = pat; + while (len--) + { + if (*p == '/') + fputs ("\\/", stdout); + else + debug_print_char (*p); + ++p; + } +} + +static void +debug_print_regex_flags (const struct regex *r, bool addr) +{ + if (!r) + return; + +#ifdef REG_PERL + if (r->flags & REG_DOTALL) /* REG_PERL */ + putchar ('s'); + if (r->flags & REG_EXTENDED) /* REG_PERL */ + putchar ('x'); +#endif + + if (r->flags & REG_ICASE) + putchar (addr ? 'I' : 'i'); + if (r->flags & REG_NEWLINE) + putchar (addr ? 'M' : 'm'); +} + +static void +debug_print_regex (const struct regex *r) +{ + if (!r) + { + /* Previous Regex */ + fputs ("//", stdout); + return; + } + + putchar ('/'); + debug_print_regex_pattern (r->re, r->sz); + putchar ('/'); +} + +static void +debug_print_addr (const struct addr *a) +{ + if (!a) + return; + switch (a->addr_type) + { + case ADDR_IS_NULL: + fputs ("[ADDR-NULL]", stdout); + break; + case ADDR_IS_REGEX: + debug_print_regex (a->addr_regex); + debug_print_regex_flags (a->addr_regex, true); + break; + case ADDR_IS_NUM: + printf ("%lu", a->addr_number); + break; + case ADDR_IS_NUM_MOD: + printf ("%lu~%lu", a->addr_number, a->addr_step); + break; + case ADDR_IS_STEP: + printf ("+%lu", a->addr_step); + break; + case ADDR_IS_STEP_MOD: + printf ("~%lu", a->addr_step); + break; + case ADDR_IS_LAST: + putchar ('$'); + break; + } +} + +static void +debug_print_subst_replacement (const struct replacement *r) +{ + enum replacement_types last_repl_type = REPL_ASIS; + + if (!r) + return; + + const struct replacement *p = r; + while (p) + { + if (p->repl_type != last_repl_type) + { + /* Special GNU replacements \E\U\u\L\l should be printed + BEFORE the 'prefix' .... the 'prefix' refers to being + before the backreference. */ + putchar ('\\'); + if (p->repl_type == 0) + putchar ('E'); + else if (p->repl_type == REPL_UPPERCASE) + putchar ('U'); + else if (p->repl_type == REPL_LOWERCASE) + putchar ('L'); + else if ((p->repl_type & REPL_MODIFIERS) == REPL_UPPERCASE_FIRST) + putchar ('u'); + else if ((p->repl_type & REPL_MODIFIERS) == REPL_LOWERCASE_FIRST) + putchar ('l'); + + last_repl_type = p->repl_type; + } + + if (p->prefix_length) + fwrite (p->prefix, 1, p->prefix_length, stdout); + + if (p->subst_id != -1) + { + if (p->subst_id == 0) + putchar ('&'); + else + printf ("\\%d", p->subst_id); + } + + p = p->next; + } +} + +static void +debug_print_output_file (const struct output *o) +{ + if (!o) + return; + + fputs (o->name, stdout); +} + +static void +debug_print_subst (const struct subst *s) +{ + if (!s) + return; + + debug_print_regex (s->regx); + debug_print_subst_replacement (s->replacement); + putchar ('/'); + + debug_print_regex_flags (s->regx, false); + + if (s->global) + putchar ('g'); + if (s->eval) + putchar ('e'); + if (s->print) + putchar ('p'); + if (s->numb) + printf ("%lu", s->numb); + if (s->outf) + { + putchar ('w'); + debug_print_output_file (s->outf); + } +} + +static void +debug_print_translation (const struct sed_cmd *sc) +{ + unsigned int i; + + if (mb_cur_max > 1) + { + /* multibyte translation */ + putchar ('/'); + for (i = 0; sc->x.translatemb[2 * i] != NULL; i++) + fputs (sc->x.translatemb[2 * i], stdout); + putchar ('/'); + for (i = 0; sc->x.translatemb[2 * i] != NULL; i++) + fputs (sc->x.translatemb[2 * i + 1], stdout); + putchar ('/'); + } + else + { + /* unibyte translation */ + putchar ('/'); + for (i = 0; i < 256; ++i) + if (sc->x.translate[i] != (unsigned char) i) + putchar ((unsigned char) i); + putchar ('/'); + for (i = 0; i < 256; ++i) + if (sc->x.translate[i] != (unsigned char) i) + putchar (sc->x.translate[i]); + putchar ('/'); + } +} + +static void +debug_print_function (const struct vector *program, const struct sed_cmd *sc) +{ + if (!sc) + return; + + putchar (sc->cmd); + + switch (sc->cmd) /* LCOV_EXCL_BR */ + { + case '=': + break; + + case ':': + printf ("%s", sc->x.label_name); + break; + + case '{': + break; + + case '}': + break; + + case '#': /* LCOV_EXCL_LINE */ + /* should not happen - discarded during compilation. */ + assert (0); /* LCOV_EXCL_LINE */ + + case 'a': + case 'c': + case 'i': + fputs ("\\", stdout); + if (sc->x.cmd_txt.text_length) + fwrite (sc->x.cmd_txt.text, 1, sc->x.cmd_txt.text_length, stdout); + break; + + case 'b': + case 't': + case 'T': + { + if (sc->x.jump_index < program->v_length) + { + const char *label_name = program->v[sc->x.jump_index].x.label_name; + if (label_name) + printf (" %s", label_name); + } + } + break; + + case 'D': + break; + + case 'd': + break; + + case 'e': + putchar (' '); + fwrite (sc->x.cmd_txt.text, 1, sc->x.cmd_txt.text_length, stdout); + break; + + case 'F': + break; + + case 'g': + break; + + case 'G': + break; + + case 'h': + break; + + case 'H': + break; + + /* 'i' is lumped above with 'a' and 'c' */ + + case 'L': + case 'l': + case 'q': + case 'Q': + if (sc->x.int_arg != -1) + printf (" %d", sc->x.int_arg); + break; + + case 'n': + break; + + case 'N': + break; + + case 'P': + break; + + case 'p': + break; + + /* 'q','Q' are lumped above with 'L' and 'l' */ + + case 'r': + putchar (' '); + fputs (sc->x.fname, stdout); + break; + + case 'R': + putchar (' '); + fputs (sc->x.inf->name, stdout); + break; + + case 's': + debug_print_subst (sc->x.cmd_subst); + break; + + /* 't','T' are lumped above with 'b' */ + + case 'v': /* LCOV_EXCL_LINE */ + /* should not happen - handled during compilation then discarded. */ + assert (0); /* LCOV_EXCL_LINE */ + + case 'W': + debug_print_output_file (sc->x.outf); + break; + + case 'w': + debug_print_output_file (sc->x.outf); + break; + + case 'x': + break; + + case 'y': + debug_print_translation (sc); + break; + + case 'z': + break; + + default: /* LCOV_EXCL_LINE */ + /* should not happen - unless missed a sed command. */ + assert (0); /* LCOV_EXCL_LINE */ + } +} + +void +debug_print_command (const struct vector *program, const struct sed_cmd *sc) +{ + bool addr_bang; + if (!program) + return; + + if (sc->cmd == '}') + --block_level; + + for (int j = 0; j < block_level; ++j) + fputs (" ", stdout); + + debug_print_addr (sc->a1); + if (sc->a2) + putchar (','); + debug_print_addr (sc->a2); + + addr_bang = sc->addr_bang; + /* Implmentation detail: GNU Sed implements beginning of block + by negating the matched address and jumping if there's no match. */ + if (sc->cmd == '{') + addr_bang = !addr_bang; + if (addr_bang) + putchar ('!'); + + if (sc->a1 || sc->a2) + putchar (' '); + + debug_print_function (program, sc); + + putchar ('\n'); + + if (sc->cmd == '{') + ++block_level; +} + +void +debug_print_program (const struct vector *program) +{ + if (!program) + return; + + block_level = 1; + puts ("SED PROGRAM:"); + for (size_t i = 0; i < program->v_length; ++i) + debug_print_command (program, &program->v[i]); + block_level = 0; +} diff --git a/sed/execute.c b/sed/execute.c new file mode 100644 index 0000000..b39bab4 --- /dev/null +++ b/sed/execute.c @@ -0,0 +1,1705 @@ +/* GNU SED, a batch stream editor. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; If not, see <https://www.gnu.org/licenses/>. */ + +#define INITIAL_BUFFER_SIZE 50 +#define FREAD_BUFFER_SIZE 8192 + +#include "sed.h" + +#include <stddef.h> +#include <stdio.h> +#include <ctype.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include "stat-macros.h" + +#include <selinux/selinux.h> +#include <selinux/context.h> +#include "acl.h" +#include "ignore-value.h" +#include "progname.h" +#include "xalloc.h" + +/* The number of extra bytes that must be allocated/usable, beyond + the declared "end" of each line buffer that may be passed to + match_regex. This is imposed by its use of dfaexec. */ +#define DFA_SLOP 1 + +/* Sed operates a line at a time. */ +struct line { + char *text; /* Pointer to line allocated by malloc. */ + char *active; /* Pointer to non-consumed part of text. */ + size_t length; /* Length of text (or active, if used). */ + size_t alloc; /* Allocated space for active. */ + bool chomped; /* Was a trailing newline dropped? */ + mbstate_t mbstate; +}; + +#define SIZEOF_LINE offsetof (struct line, mbstate) + +/* A queue of text to write out at the end of a cycle + (filled by the "a", "r" and "R" commands.) */ +struct append_queue { + const char *fname; + char *text; + size_t textlen; + struct append_queue *next; + bool free; +}; + +/* State information for the input stream. */ +struct input { + /* The list of yet-to-be-opened files. It is invalid for file_list + to be NULL. When *file_list is NULL we are currently processing + the last file. */ + + char **file_list; + + /* Count of files we failed to open. */ + countT bad_count; + + /* Current input line number (over all files). */ + countT line_number; + + /* True if we'll reset line numbers and addresses before + starting to process the next (possibly the first) file. */ + bool reset_at_next_file; + + /* Function to read one line. If FP is NULL, read_fn better not + be one which uses fp; in particular, read_always_fail() is + recommended. */ + bool (*read_fn) (struct input *); /* read one line */ + + char *out_file_name; + + const char *in_file_name; + + /* Owner and mode to be set just before closing the file. */ + struct stat st; + + /* if NULL, none of the following are valid */ + FILE *fp; + + bool no_buffering; +}; + + +/* Have we done any replacements lately? This is used by the `t' command. */ +static bool replaced = false; + +/* The current output file (stdout if -i is not being used. */ +static struct output output_file; + +/* The `current' input line. */ +static struct line line; + +/* An input line used to accumulate the result of the s and e commands. */ +static struct line s_accum; + +/* An input line that's been stored by later use by the program */ +static struct line hold; + +/* The buffered input look-ahead. The only field that should be + used outside of read_mem_line() or line_init() is buffer.length. */ +static struct line buffer; + +static struct append_queue *append_head = NULL; +static struct append_queue *append_tail = NULL; + +/* increase a struct line's length, making some attempt at + keeping realloc() calls under control by padding for future growth. */ +static void +resize_line (struct line *lb, size_t len) +{ + int inactive; + inactive = lb->active - lb->text; + + /* If the inactive part has got to more than two thirds of the buffer, + * remove it. */ + if (inactive > lb->alloc * 2) + { + memmove (lb->text, lb->active, lb->length); + lb->alloc += lb->active - lb->text; + lb->active = lb->text; + inactive = 0; + + if (lb->alloc > len) + return; + } + + lb->alloc *= 2; + if (lb->alloc < len) + lb->alloc = len; + if (lb->alloc < INITIAL_BUFFER_SIZE) + lb->alloc = INITIAL_BUFFER_SIZE; + + lb->text = REALLOC (lb->text, inactive + lb->alloc + DFA_SLOP, char); + lb->active = lb->text + inactive; +} + +/* Append LENGTH bytes from STRING to the line, TO. */ +static void +str_append (struct line *to, const char *string, size_t length) +{ + size_t new_length = to->length + length; + + if (to->alloc < new_length) + resize_line (to, new_length); + memcpy (to->active + to->length, string, length); + to->length = new_length; + + if (mb_cur_max > 1 && !is_utf8) + while (length) + { + size_t n = MBRLEN (string, length, &to->mbstate); + + /* Treat an invalid or incomplete sequence like a + single-byte character. */ + if (n == (size_t) -1 || n == (size_t) -2) + { + memset (&to->mbstate, 0, sizeof (to->mbstate)); + n = 1; + } + + if (n == 0) + break; + + string += n; + length -= n; + } +} + +static void +str_append_modified (struct line *to, const char *string, size_t length, + enum replacement_types type) +{ + mbstate_t from_stat; + + if (type == REPL_ASIS) + { + str_append (to, string, length); + return; + } + + if (to->alloc - to->length < length * mb_cur_max) + resize_line (to, to->length + length * mb_cur_max); + + memcpy (&from_stat, &to->mbstate, sizeof (mbstate_t)); + while (length) + { + wchar_t wc; + size_t n = MBRTOWC (&wc, string, length, &from_stat); + + /* Treat an invalid sequence like a single-byte character. */ + if (n == (size_t) -1) + { + type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST); + if (type == REPL_ASIS) + { + str_append (to, string, length); + return; + } + + str_append (to, string, 1); + memset (&to->mbstate, 0, sizeof (from_stat)); + n = 1; + string += n, length -= n; + continue; + } + + if (n == 0 || n == (size_t) -2) + { + /* L'\0' or an incomplete sequence: copy it manually. */ + str_append (to, string, length); + return; + } + + string += n, length -= n; + + /* Convert the first character specially... */ + if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST)) + { + if (type & REPL_UPPERCASE_FIRST) + wc = towupper (wc); + else + wc = towlower (wc); + + type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST); + if (type == REPL_ASIS) + { + /* Copy the new wide character to the end of the string. */ + n = WCRTOMB (to->active + to->length, wc, &to->mbstate); + to->length += n; + if (n == (size_t) -1 || n == (size_t) -2) + { + fprintf (stderr, + _("case conversion produced an invalid character")); + abort (); + } + str_append (to, string, length); + return; + } + } + else if (type & REPL_UPPERCASE) + wc = towupper (wc); + else + wc = towlower (wc); + + /* Copy the new wide character to the end of the string. */ + n = WCRTOMB (to->active + to->length, wc, &to->mbstate); + to->length += n; + if (n == -1 || n == -2) + { + fprintf (stderr, _("case conversion produced an invalid character")); + abort (); + } + } +} + +/* Initialize a "struct line" buffer. Copy multibyte state from `state' + if not null. */ +static void +line_init (struct line *buf, struct line *state, size_t initial_size) +{ + buf->text = XCALLOC (initial_size + DFA_SLOP, char); + buf->active = buf->text; + buf->alloc = initial_size; + buf->length = 0; + buf->chomped = true; + + if (state) + memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate)); + else + memset (&buf->mbstate, 0, sizeof (buf->mbstate)); +} + +/* Reset a "struct line" buffer to length zero. Copy multibyte state from + `state' if not null. */ +static void +line_reset (struct line *buf, struct line *state) +{ + if (buf->alloc == 0) + line_init (buf, state, INITIAL_BUFFER_SIZE); + else + { + buf->length = 0; + if (state) + memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate)); + else + memset (&buf->mbstate, 0, sizeof (buf->mbstate)); + } +} + +/* Copy the contents of the line `from' into the line `to'. + This destroys the old contents of `to'. + Copy the multibyte state if `state' is true. */ +static void +line_copy (struct line *from, struct line *to, int state) +{ + /* Remove the inactive portion in the destination buffer. */ + to->alloc += to->active - to->text; + + if (to->alloc < from->length) + { + to->alloc *= 2; + if (to->alloc < from->length) + to->alloc = from->length; + if (to->alloc < INITIAL_BUFFER_SIZE) + to->alloc = INITIAL_BUFFER_SIZE; + /* Use free()+MALLOC() instead of REALLOC() to + avoid unnecessary copying of old text. */ + free (to->text); + to->text = XCALLOC (to->alloc + DFA_SLOP, char); + } + + to->active = to->text; + to->length = from->length; + to->chomped = from->chomped; + memcpy (to->active, from->active, from->length); + + if (state) + memcpy (&to->mbstate, &from->mbstate, sizeof (from->mbstate)); +} + +/* Append the contents of the line `from' to the line `to'. + Copy the multibyte state if `state' is true. */ +static void +line_append (struct line *from, struct line *to, int state) +{ + str_append (to, &buffer_delimiter, 1); + str_append (to, from->active, from->length); + to->chomped = from->chomped; + + if (state) + memcpy (&to->mbstate, &from->mbstate, sizeof (from->mbstate)); +} + +/* Exchange two "struct line" buffers. + Copy the multibyte state if `state' is true. */ +static void +line_exchange (struct line *a, struct line *b, int state) +{ + struct line t; + + if (state) + { + memcpy (&t, a, sizeof (struct line)); + memcpy ( a, b, sizeof (struct line)); + memcpy ( b, &t, sizeof (struct line)); + } + else + { + memcpy (&t, a, SIZEOF_LINE); + memcpy ( a, b, SIZEOF_LINE); + memcpy ( b, &t, SIZEOF_LINE); + } +} + +/* dummy function to simplify read_pattern_space() */ +static bool +read_always_fail (struct input *input _GL_UNUSED) +{ + return false; +} + +static bool +read_file_line (struct input *input) +{ + static char *b; + static size_t blen; + + long result = ck_getdelim (&b, &blen, buffer_delimiter, input->fp); + if (result <= 0) + return false; + + /* Remove the trailing new-line that is left by getline. */ + if (b[result - 1] == buffer_delimiter) + --result; + else + line.chomped = false; + + str_append (&line, b, result); + return true; +} + +static inline void +output_missing_newline (struct output *outf) +{ + if (outf->missing_newline) + { + ck_fwrite (&buffer_delimiter, 1, 1, outf->fp); + outf->missing_newline = false; + } +} + +static inline void +flush_output (FILE *fp) +{ + if (unbuffered) + ck_fflush (fp); +} + +static void +output_line (const char *text, size_t length, int nl, struct output *outf) +{ + if (!text) + return; + + output_missing_newline (outf); + if (length) + ck_fwrite (text, 1, length, outf->fp); + if (nl) + ck_fwrite (&buffer_delimiter, 1, 1, outf->fp); + else + outf->missing_newline = true; + + flush_output (outf->fp); +} + +static struct append_queue * +next_append_slot (void) +{ + struct append_queue *n = XCALLOC (1, struct append_queue); + + n->fname = NULL; + n->text = NULL; + n->textlen = 0; + n->next = NULL; + n->free = false; + + if (append_tail) + append_tail->next = n; + else + append_head = n; + return append_tail = n; +} + +static void +release_append_queue (void) +{ + struct append_queue *p, *q; + + for (p=append_head; p; p=q) + { + if (p->free) + free (p->text); + + q = p->next; + free (p); + } + append_head = append_tail = NULL; +} + +static void +dump_append_queue (void) +{ + struct append_queue *p; + + output_missing_newline (&output_file); + for (p=append_head; p; p=p->next) + { + if (p->text) + ck_fwrite (p->text, 1, p->textlen, output_file.fp); + + if (p->fname) + { + char buf[FREAD_BUFFER_SIZE]; + size_t cnt; + FILE *fp; + + /* "If _fname_ does not exist or cannot be read, it shall + be treated as if it were an empty file, causing no error + condition." IEEE Std 1003.2-1992 + So, don't fail. */ + fp = ck_fopen (p->fname, read_mode, false); + if (fp) + { + while ((cnt = ck_fread (buf, 1, sizeof buf, fp)) > 0) + ck_fwrite (buf, 1, cnt, output_file.fp); + ck_fclose (fp); + } + } + } + + flush_output (output_file.fp); + release_append_queue (); +} + +/* Compute the name of the backup file for in-place editing */ +static char * +get_backup_file_name (const char *name) +{ + char *old_asterisk, *asterisk, *backup, *p; + int name_length = strlen (name), backup_length = strlen (in_place_extension); + + /* Compute the length of the backup file */ + for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1; + (asterisk = strchr (old_asterisk, '*')); + old_asterisk = asterisk + 1) + backup_length += name_length - 1; + + p = backup = xmalloc (backup_length + 1); + + /* Each iteration gobbles up to an asterisk */ + for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1; + (asterisk = strchr (old_asterisk, '*')); + old_asterisk = asterisk + 1) + { + memcpy (p, old_asterisk, asterisk - old_asterisk); + p += asterisk - old_asterisk; + strcpy (p, name); + p += name_length; + } + + /* Tack on what's after the last asterisk */ + strcpy (p, old_asterisk); + return backup; +} + +/* Initialize a struct input for the named file. */ +static void +open_next_file (const char *name, struct input *input) +{ + buffer.length = 0; + + input->in_file_name = name; + if (name[0] == '-' && name[1] == '\0' && !in_place_extension) + { + clearerr (stdin); /* clear any stale EOF indication */ +#if defined WIN32 || defined _WIN32 || defined __CYGWIN__ \ + || defined MSDOS || defined __EMX__ + input->fp = ck_fdopen (fileno (stdin), "stdin", read_mode, false); +#else + input->fp = stdin; +#endif + } + else + { + if (follow_symlinks) + input->in_file_name = follow_symlink (name); + + if ( ! (input->fp = ck_fopen (name, read_mode, false)) ) + { + const char *ptr = strerror (errno); + fprintf (stderr, _("%s: can't read %s: %s\n"), program_name, + name, ptr); + input->read_fn = read_always_fail; /* a redundancy */ + ++input->bad_count; + return; + } + } + + input->read_fn = read_file_line; + + if (in_place_extension) + { + int input_fd; + char *tmpdir, *p; + security_context_t old_fscreatecon; + int reset_fscreatecon = 0; + memset (&old_fscreatecon, 0, sizeof (old_fscreatecon)); + + /* get the base name */ + tmpdir = xstrdup (input->in_file_name); + if ((p = strrchr (tmpdir, '/'))) + *p = 0; + else + strcpy (tmpdir, "."); + + if (isatty (fileno (input->fp))) + panic (_("couldn't edit %s: is a terminal"), input->in_file_name); + + input_fd = fileno (input->fp); + fstat (input_fd, &input->st); + if (!S_ISREG (input->st.st_mode)) + panic (_("couldn't edit %s: not a regular file"), input->in_file_name); + + if (is_selinux_enabled () > 0) + { + security_context_t con; + if (lgetfilecon (input->in_file_name, &con) != -1) + { + /* Save and restore the old context for the sake of w and W + commands. */ + reset_fscreatecon = getfscreatecon (&old_fscreatecon) >= 0; + if (setfscreatecon (con) < 0) + fprintf (stderr, _("%s: warning: failed to set default" \ + " file creation context to %s: %s"), + program_name, con, strerror (errno)); + freecon (con); + } + else + { + if (errno != ENOSYS) + fprintf (stderr, _("%s: warning: failed to get" \ + " security context of %s: %s"), + program_name, input->in_file_name, strerror (errno)); + } + } + + output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed", + write_mode); + register_cleanup_file (input->out_file_name); + output_file.missing_newline = false; + free (tmpdir); + + if (reset_fscreatecon) + { + setfscreatecon (old_fscreatecon); + freecon (old_fscreatecon); + } + + if (!output_file.fp) + panic (_("couldn't open temporary file %s: %s"), input->out_file_name, + strerror (errno)); + } + else + { + if (input->fp && unbuffered) + setvbuf (input->fp, NULL, _IONBF, 0); + output_file.fp = stdout; + } +} + + +/* Clean up an input stream that we are done with. */ +static void +closedown (struct input *input) +{ + input->read_fn = read_always_fail; + if (!input->fp) + return; + + if (in_place_extension && output_file.fp != NULL) + { + const char *target_name; + int input_fd, output_fd; + + target_name = input->in_file_name; + input_fd = fileno (input->fp); + output_fd = fileno (output_file.fp); +#ifdef HAVE_FCHOWN + /* Try to set both UID and GID, but if that fails, + try to set only the GID. Ignore failure. */ + if (fchown (output_fd, input->st.st_uid, input->st.st_gid) == -1) + ignore_value (fchown (output_fd, -1, input->st.st_gid)); +#endif + copy_acl (input->in_file_name, input_fd, + input->out_file_name, output_fd, + input->st.st_mode); + + ck_fclose (input->fp); + ck_fclose (output_file.fp); + if (strcmp (in_place_extension, "*") != 0) + { + char *backup_file_name = get_backup_file_name (target_name); + ck_rename (target_name, backup_file_name, input->out_file_name); + free (backup_file_name); + } + + ck_rename (input->out_file_name, target_name, input->out_file_name); + cancel_cleanup (); + free (input->out_file_name); + } + else + ck_fclose (input->fp); + + input->fp = NULL; +} + +/* Reset range commands so that they are marked as non-matching */ +static void +reset_addresses (struct vector *vec) +{ + struct sed_cmd *cur_cmd; + int n; + + for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++) + if (cur_cmd->a1 + && cur_cmd->a1->addr_type == ADDR_IS_NUM + && cur_cmd->a1->addr_number == 0) + cur_cmd->range_state = RANGE_ACTIVE; + else + cur_cmd->range_state = RANGE_INACTIVE; +} + +/* Read in the next line of input, and store it in the pattern space. + Return zero if there is nothing left to input. */ +static bool +read_pattern_space (struct input *input, struct vector *the_program, int append) +{ + if (append_head) /* redundant test to optimize for common case */ + dump_append_queue (); + replaced = false; + if (!append) + line.length = 0; + line.chomped = true; /* default, until proved otherwise */ + + while ( ! (*input->read_fn)(input) ) + { + closedown (input); + + if (!*input->file_list) + return false; + + if (input->reset_at_next_file) + { + input->line_number = 0; + hold.length = 0; + reset_addresses (the_program); + rewind_read_files (); + + /* If doing in-place editing, we will never append the + new-line to this file; but if the output goes to stdout, + we might still have to output the missing new-line. */ + if (in_place_extension) + output_file.missing_newline = false; + + input->reset_at_next_file = separate_files; + } + + open_next_file (*input->file_list++, input); + } + + ++input->line_number; + return true; +} + +static bool +last_file_with_data_p (struct input *input) +{ + for (;;) + { + int ch; + + closedown (input); + if (!*input->file_list) + return true; + open_next_file (*input->file_list++, input); + if (input->fp) + { + if ((ch = getc (input->fp)) != EOF) + { + ungetc (ch, input->fp); + return false; + } + } + } +} + +/* Determine if we match the `$' address. */ +static bool +test_eof (struct input *input) +{ + int ch; + + if (buffer.length) + return false; + if (!input->fp) + return separate_files || last_file_with_data_p (input); + if (feof (input->fp)) + return separate_files || last_file_with_data_p (input); + if ((ch = getc (input->fp)) == EOF) + return separate_files || last_file_with_data_p (input); + ungetc (ch, input->fp); + return false; +} + +/* Return non-zero if the current line matches the address + pointed to by `addr'. */ +static bool +match_an_address_p (struct addr *addr, struct input *input) +{ + switch (addr->addr_type) + { + case ADDR_IS_NULL: + return true; + + case ADDR_IS_REGEX: + return match_regex (addr->addr_regex, line.active, line.length, 0, + NULL, 0); + + case ADDR_IS_NUM_MOD: + return (input->line_number >= addr->addr_number + && ((input->line_number - addr->addr_number) + % addr->addr_step) == 0); + + case ADDR_IS_STEP: + case ADDR_IS_STEP_MOD: + /* reminder: these are only meaningful for a2 addresses */ + /* a2->addr_number needs to be recomputed each time a1 address + matches for the step and step_mod types */ + return (addr->addr_number <= input->line_number); + + case ADDR_IS_LAST: + return test_eof (input); + + case ADDR_IS_NUM: + /* reminder: these are only meaningful for a1 addresses */ + return (addr->addr_number == input->line_number); + + default: + panic ("INTERNAL ERROR: bad address type"); + } + /*NOTREACHED*/ + return false; +} + +/* return non-zero if current address is valid for cmd */ +static bool +match_address_p (struct sed_cmd *cmd, struct input *input) +{ + if (!cmd->a1) + return true; + + if (cmd->range_state != RANGE_ACTIVE) + { + if (!cmd->a2) + return match_an_address_p (cmd->a1, input); + + /* Find if we are going to activate a range. Handle ADDR_IS_NUM + specially: it represent an "absolute" state, it should not + be computed like regexes. */ + if (cmd->a1->addr_type == ADDR_IS_NUM) + { + if (cmd->range_state == RANGE_CLOSED + || input->line_number < cmd->a1->addr_number) + return false; + } + else + { + if (!match_an_address_p (cmd->a1, input)) + return false; + } + + /* Ok, start a new range. */ + cmd->range_state = RANGE_ACTIVE; + switch (cmd->a2->addr_type) + { + case ADDR_IS_REGEX: + /* Always include at least two lines. */ + return true; + case ADDR_IS_NUM: + /* Same handling as below, but always include at least one line. */ + if (input->line_number >= cmd->a2->addr_number) + cmd->range_state = RANGE_CLOSED; + return (input->line_number <= cmd->a2->addr_number + || match_an_address_p (cmd->a1, input)); + case ADDR_IS_STEP: + cmd->a2->addr_number = input->line_number + cmd->a2->addr_step; + return true; + case ADDR_IS_STEP_MOD: + cmd->a2->addr_number = input->line_number + cmd->a2->addr_step + - (input->line_number%cmd->a2->addr_step); + return true; + default: + break; + } + } + + /* cmd->range_state == RANGE_ACTIVE. Check if the range is + ending; also handle ADDR_IS_NUM specially in this case. */ + + if (cmd->a2->addr_type == ADDR_IS_NUM) + { + /* If the second address is a line number, and if we got past + that line, fail to match (it can happen when you jump + over such addresses with `b' and `t'. Use RANGE_CLOSED + so that the range is not re-enabled anymore. */ + if (input->line_number >= cmd->a2->addr_number) + cmd->range_state = RANGE_CLOSED; + + return (input->line_number <= cmd->a2->addr_number); + } + + /* Other addresses are treated as usual. */ + if (match_an_address_p (cmd->a2, input)) + cmd->range_state = RANGE_CLOSED; + + return true; +} + +static void +do_list (int line_len) +{ + unsigned char *p = (unsigned char *)line.active; + countT len = line.length; + countT width = 0; + char obuf[180]; /* just in case we encounter a 512-bit char (;-) */ + char *o; + size_t olen; + FILE *fp = output_file.fp; + + output_missing_newline (&output_file); + for (; len--; ++p) { + o = obuf; + + /* Some locales define 8-bit characters as printable. This makes the + testsuite fail at 8to7.sed because the `l' command in fact will not + convert the 8-bit characters. */ +#if defined isascii || defined HAVE_ISASCII + if (isascii (*p) && ISPRINT (*p)) { +#else + if (ISPRINT (*p)) { +#endif + *o++ = *p; + if (*p == '\\') + *o++ = '\\'; + } else { + *o++ = '\\'; + switch (*p) { +#if defined __STDC__ && __STDC__-0 + case '\a': *o++ = 'a'; break; +#else /* Not STDC; we'll just assume ASCII */ + case 007: *o++ = 'a'; break; +#endif + case '\b': *o++ = 'b'; break; + case '\f': *o++ = 'f'; break; + case '\n': *o++ = 'n'; break; + case '\r': *o++ = 'r'; break; + case '\t': *o++ = 't'; break; + case '\v': *o++ = 'v'; break; + default: + sprintf (o, "%03o", *p); + o += strlen (o); + break; + } + } + olen = o - obuf; + if (width+olen >= line_len && line_len > 0) { + ck_fwrite ("\\", 1, 1, fp); + ck_fwrite (&buffer_delimiter, 1, 1, fp); + width = 0; + } + ck_fwrite (obuf, 1, olen, fp); + width += olen; + } + ck_fwrite ("$", 1, 1, fp); + ck_fwrite (&buffer_delimiter, 1, 1, fp); + flush_output (fp); +} + + +static void append_replacement (struct line *buf, struct replacement *p, + struct re_registers *regs) +{ + enum replacement_types repl_mod = 0; + + for (; p; p=p->next) + { + int i = p->subst_id; + enum replacement_types curr_type; + + /* Apply a \[lu] modifier that was given earlier, but which we + have not had yet the occasion to apply. But don't do it + if this replacement has a modifier of its own. */ + curr_type = (p->repl_type & REPL_MODIFIERS) + ? p->repl_type + : p->repl_type | repl_mod; + + repl_mod = 0; + if (p->prefix_length) + { + str_append_modified (buf, p->prefix, p->prefix_length, + curr_type); + curr_type &= ~REPL_MODIFIERS; + } + + if (0 <= i && i < regs->num_regs) + { + if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS) + /* Save this modifier, we shall apply it later. + e.g. in s/()([a-z])/\u\1\2/ + the \u modifier is applied to \2, not \1 */ + repl_mod = curr_type & REPL_MODIFIERS; + + else if (regs->end[i] != regs->start[i]) + str_append_modified (buf, line.active + regs->start[i], + (size_t)(regs->end[i] - regs->start[i]), + curr_type); + } + } +} + +static void +do_subst (struct subst *sub) +{ + size_t start = 0; /* where to start scan for (next) match in LINE */ + size_t last_end = 0; /* where did the last successful match end in LINE */ + countT count = 0; /* number of matches found */ + bool again = true; + + static struct re_registers regs; + + line_reset (&s_accum, &line); + + /* The first part of the loop optimizes s/xxx// when xxx is at the + start, and s/xxx$// */ + if (!match_regex (sub->regx, line.active, line.length, start, + ®s, sub->max_id + 1)) + return; + + if (debug) + { + if (regs.num_regs>0 && regs.start[0] != -1) + puts ("MATCHED REGEX REGISTERS"); + + for (int i = 0; i < regs.num_regs; ++i) + { + if (regs.start[i] == -1) + break; + + printf (" regex[%d] = %d-%d '", i, + (int)regs.start[i], (int)regs.end[i]); + + if (regs.start[i] != regs.end[i]) + fwrite (line.active + regs.start[i], regs.end[i] -regs.start[i], + 1, stdout); + + puts ("'"); + } + } + + if (!sub->replacement && sub->numb <= 1) + { + if (regs.start[0] == 0 && !sub->global) + { + /* We found a match, set the `replaced' flag. */ + replaced = true; + + line.active += regs.end[0]; + line.length -= regs.end[0]; + line.alloc -= regs.end[0]; + goto post_subst; + } + else if (regs.end[0] == line.length) + { + /* We found a match, set the `replaced' flag. */ + replaced = true; + + line.length = regs.start[0]; + goto post_subst; + } + } + + do + { + size_t offset = regs.start[0]; + size_t matched = regs.end[0] - regs.start[0]; + + /* Copy stuff to the left of this match into the output string. */ + if (start < offset) + { + str_append (&s_accum, line.active + start, offset - start); + start = offset; + } + + /* If we're counting up to the Nth match, are we there yet? + And even if we are there, there is another case we have to + skip: are we matching an empty string immediately following + another match? + + This latter case avoids that baaaac, when passed through + s,a*,x,g, gives `xbxxcx' instead of xbxcx. This behavior is + unacceptable because it is not consistently applied (for + example, `baaaa' gives `xbx', not `xbxx'). */ + if ((matched > 0 || count == 0 || offset > last_end) + && ++count >= sub->numb) + { + /* We found a match, set the `replaced' flag. */ + replaced = true; + + /* Now expand the replacement string into the output string. */ + append_replacement (&s_accum, sub->replacement, ®s); + again = sub->global; + } + else + { + /* The match was not replaced. Copy the text until its + end; if it was vacuous, skip over one character and + add that character to the output. */ + if (matched == 0) + { + if (start < line.length) + matched = 1; + else + break; + } + + str_append (&s_accum, line.active + offset, matched); + } + + /* Start after the match. last_end is the real end of the matched + substring, excluding characters that were skipped in case the RE + matched the empty string. */ + start = offset + matched; + last_end = regs.end[0]; + } + while (again + && start <= line.length + && match_regex (sub->regx, line.active, line.length, start, + ®s, sub->max_id + 1)); + + /* Copy stuff to the right of the last match into the output string. */ + if (start < line.length) + str_append (&s_accum, line.active + start, line.length-start); + s_accum.chomped = line.chomped; + + /* Exchange line and s_accum. This can be much cheaper + than copying s_accum.active into line.text (for huge lines). */ + line_exchange (&line, &s_accum, false); + + /* Finish up. */ + if (count < sub->numb) + return; + + post_subst: + if (sub->print & 1) + output_line (line.active, line.length, line.chomped, &output_file); + + if (sub->eval) + { +#ifdef HAVE_POPEN + FILE *pipe_fp; + line_reset (&s_accum, NULL); + + str_append (&line, "", 1); + pipe_fp = popen (line.active, "r"); + + if (pipe_fp != NULL) + { + while (!feof (pipe_fp)) + { + char buf[4096]; + int n = fread (buf, sizeof (char), 4096, pipe_fp); + if (n > 0) + str_append (&s_accum, buf, n); + } + + pclose (pipe_fp); + + /* Exchange line and s_accum. This can be much cheaper than copying + s_accum.active into line.text (for huge lines). See comment above + for 'g' as to while the third argument is incorrect anyway. */ + line_exchange (&line, &s_accum, true); + if (line.length + && line.active[line.length - 1] == buffer_delimiter) + line.length--; + } + else + panic (_("error in subprocess")); +#else + panic (_("option `e' not supported")); +#endif + } + + if (sub->print & 2) + output_line (line.active, line.length, line.chomped, &output_file); + if (sub->outf) + output_line (line.active, line.length, line.chomped, sub->outf); +} + +/* Translate the global input LINE via TRANS. + This function handles the multi-byte case. */ +static void +translate_mb (char *const *trans) +{ + size_t idx; /* index in the input line. */ + mbstate_t mbstate = { 0, }; + for (idx = 0; idx < line.length;) + { + unsigned int i; + size_t mbclen = MBRLEN (line.active + idx, + line.length - idx, &mbstate); + /* An invalid sequence, or a truncated multibyte + character. Treat it as a single-byte character. */ + if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) + mbclen = 1; + + /* `i' indicate i-th translate pair. */ + for (i = 0; trans[2*i] != NULL; i++) + { + if (STREQ_LEN (line.active + idx, trans[2*i], mbclen)) + { + bool move_remain_buffer = false; + const char *tr = trans[2*i+1]; + size_t trans_len = *tr == '\0' ? 1 : strlen (tr); + + if (mbclen < trans_len) + { + size_t new_len = (line.length + 1 + + trans_len - mbclen); + /* We must extend the line buffer. */ + if (line.alloc < new_len) + { + /* And we must resize the buffer. */ + resize_line (&line, new_len); + } + move_remain_buffer = true; + } + else if (mbclen > trans_len) + { + /* We must truncate the line buffer. */ + move_remain_buffer = true; + } + size_t prev_idx = idx; + if (move_remain_buffer) + { + /* Move the remaining with \0. */ + char const *move_from = (line.active + idx + mbclen); + char *move_to = line.active + idx + trans_len; + size_t move_len = line.length + 1 - idx - mbclen; + size_t move_offset = trans_len - mbclen; + memmove (move_to, move_from, move_len); + line.length += move_offset; + idx += move_offset; + } + memcpy (line.active + prev_idx, trans[2*i+1], + trans_len); + break; + } + } + idx += mbclen; + } +} + +static void +debug_print_end_of_cycle (void) +{ + puts ("END-OF-CYCLE:"); +} + +static void +debug_print_input (const struct input *input) +{ + bool is_stdin = (input->fp && fileno (input->fp) == 0); + + printf ("INPUT: '%s' line %lu\n", + is_stdin?"STDIN":input->in_file_name, + input->line_number); +} + +static void +debug_print_line (struct line *ln) +{ + const char *src = ln->active ? ln->active : ln->text; + size_t l = ln->length; + const char *p = src; + + fputs ( (ln == &hold) ? "HOLD: ":"PATTERN: ", stdout); + while (l--) + debug_print_char (*p++); + putchar ('\n'); +} + +/* Execute the program `vec' on the current input line. + Return exit status if caller should quit, -1 otherwise. */ +static int +execute_program (struct vector *vec, struct input *input) +{ + struct sed_cmd *cur_cmd; + struct sed_cmd *end_cmd; + + cur_cmd = vec->v; + end_cmd = vec->v + vec->v_length; + while (cur_cmd < end_cmd) + { + if (debug) + { + fputs ("COMMAND: ", stdout); + debug_print_command (vec, cur_cmd); + } + + if (match_address_p (cur_cmd, input) != cur_cmd->addr_bang) + { + switch (cur_cmd->cmd) + { + case 'a': + { + struct append_queue *aq = next_append_slot (); + aq->text = cur_cmd->x.cmd_txt.text; + aq->textlen = cur_cmd->x.cmd_txt.text_length; + } + break; + + case '{': + case 'b': + cur_cmd = vec->v + cur_cmd->x.jump_index; + continue; + + case '}': + case '#': + case ':': + /* Executing labels and block-ends are easy. */ + break; + + case 'c': + if (cur_cmd->range_state != RANGE_ACTIVE) + output_line (cur_cmd->x.cmd_txt.text, + cur_cmd->x.cmd_txt.text_length - 1, true, + &output_file); + /* POSIX.2 is silent about c starting a new cycle, + but it seems to be expected (and make sense). */ + FALLTHROUGH; + case 'd': + if (debug) + debug_print_end_of_cycle (); + return -1; + + case 'D': + { + char *p = memchr (line.active, buffer_delimiter, line.length); + if (!p) + return -1; + + ++p; + line.alloc -= p - line.active; + line.length -= p - line.active; + line.active += p - line.active; + + /* reset to start next cycle without reading a new line: */ + cur_cmd = vec->v; + + if (debug) + debug_print_line (&line); + continue; + } + + case 'e': { +#ifndef HAVE_POPEN + panic (_("`e' command not supported")); +#else + FILE *pipe_fp; + int cmd_length = cur_cmd->x.cmd_txt.text_length; + line_reset (&s_accum, NULL); + + if (!cmd_length) + { + str_append (&line, "", 1); + pipe_fp = popen (line.active, "r"); + } + else + { + cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0; + pipe_fp = popen (cur_cmd->x.cmd_txt.text, "r"); + output_missing_newline (&output_file); + } + + if (pipe_fp == NULL) + panic (_("error in subprocess")); + + { + char buf[4096]; + int n; + while (!feof (pipe_fp)) + if ((n = fread (buf, sizeof (char), 4096, pipe_fp)) > 0) + { + if (!cmd_length) + str_append (&s_accum, buf, n); + else + ck_fwrite (buf, 1, n, output_file.fp); + } + + pclose (pipe_fp); + if (!cmd_length) + { + /* Store into pattern space for plain `e' commands */ + if (s_accum.length + && (s_accum.active[s_accum.length - 1] + == buffer_delimiter)) + s_accum.length--; + + /* Exchange line and s_accum. This can be much + cheaper than copying s_accum.active into line.text + (for huge lines). See comment above for 'g' as + to while the third argument is incorrect anyway. */ + line_exchange (&line, &s_accum, true); + } + else + flush_output (output_file.fp); + } +#endif + break; + } + + case 'g': + /* We do not have a really good choice for the third parameter. + The problem is that hold space and the input file might as + well have different states; copying it from hold space means + that subsequent input might be read incorrectly, while + keeping it as in pattern space means that commands operating + on the moved buffer might consider a wrong character set. + We keep it true because it's what sed <= 4.1.5 did. */ + line_copy (&hold, &line, true); + if (debug) + debug_print_line (&hold); + break; + + case 'G': + /* We do not have a really good choice for the third parameter. + The problem is that hold space and pattern space might as + well have different states. So, true is as wrong as false. + We keep it true because it's what sed <= 4.1.5 did, but + we could consider having line_ap. */ + line_append (&hold, &line, true); + if (debug) + debug_print_line (&line); + break; + + case 'h': + /* Here, it is ok to have true. */ + line_copy (&line, &hold, true); + if (debug) + debug_print_line (&hold); + break; + + case 'H': + /* See comment above for 'G' regarding the third parameter. */ + line_append (&line, &hold, true); + if (debug) + debug_print_line (&hold); + break; + + case 'i': + output_line (cur_cmd->x.cmd_txt.text, + cur_cmd->x.cmd_txt.text_length - 1, + true, &output_file); + break; + + case 'l': + do_list (cur_cmd->x.int_arg == -1 + ? lcmd_out_line_len + : cur_cmd->x.int_arg); + break; + + case 'n': + if (!no_default_output) + output_line (line.active, line.length, line.chomped, + &output_file); + if (test_eof (input) || !read_pattern_space (input, vec, false)) + { + if (debug) + debug_print_end_of_cycle (); + return -1; + } + + if (debug) + debug_print_line (&line); + break; + + case 'N': + str_append (&line, &buffer_delimiter, 1); + + if (test_eof (input) || !read_pattern_space (input, vec, true)) + { + if (debug) + debug_print_end_of_cycle (); + line.length--; + if (posixicity == POSIXLY_EXTENDED && !no_default_output) + output_line (line.active, line.length, line.chomped, + &output_file); + return -1; + } + if (debug) + debug_print_line (&line); + break; + + case 'p': + output_line (line.active, line.length, line.chomped, + &output_file); + break; + + case 'P': + { + char *p = memchr (line.active, buffer_delimiter, line.length); + output_line (line.active, p ? p - line.active : line.length, + p ? true : line.chomped, &output_file); + } + break; + + case 'q': + if (!no_default_output) + output_line (line.active, line.length, line.chomped, + &output_file); + dump_append_queue (); + FALLTHROUGH; + + case 'Q': + return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg; + + case 'r': + if (cur_cmd->x.fname) + { + struct append_queue *aq = next_append_slot (); + aq->fname = cur_cmd->x.fname; + } + break; + + case 'R': + if (cur_cmd->x.inf->fp && !feof (cur_cmd->x.inf->fp)) + { + struct append_queue *aq; + size_t buflen; + char *text = NULL; + int result; + + result = ck_getdelim (&text, &buflen, buffer_delimiter, + cur_cmd->x.inf->fp); + if (result != EOF) + { + aq = next_append_slot (); + aq->free = true; + aq->text = text; + aq->textlen = result; + } + else + { + /* The external input file (for R command) reached EOF, + the 'text' buffer will not be added to the append queue + so release it */ + free (text); + } + } + break; + + case 's': + do_subst (cur_cmd->x.cmd_subst); + if (debug) + debug_print_line (&line); + break; + + case 't': + if (replaced) + { + replaced = false; + cur_cmd = vec->v + cur_cmd->x.jump_index; + continue; + } + break; + + case 'T': + if (!replaced) + { + cur_cmd = vec->v + cur_cmd->x.jump_index; + continue; + } + else + replaced = false; + break; + + case 'w': + if (cur_cmd->x.outf->fp) + output_line (line.active, line.length, + line.chomped, cur_cmd->x.outf); + break; + + case 'W': + if (cur_cmd->x.outf->fp) + { + char *p = memchr (line.active, buffer_delimiter, line.length); + output_line (line.active, p ? p - line.active : line.length, + p ? true : line.chomped, cur_cmd->x.outf); + } + break; + + case 'x': + /* See comment above for 'g' regarding the third parameter. */ + line_exchange (&line, &hold, false); + if (debug) + { + debug_print_line (&line); + debug_print_line (&hold); + } + break; + + case 'y': + if (mb_cur_max > 1) + translate_mb (cur_cmd->x.translatemb); + else + { + unsigned char *p, *e; + p = (unsigned char *)line.active; + for (e=p+line.length; p<e; ++p) + *p = cur_cmd->x.translate[*p]; + } + if (debug) + debug_print_line (&line); + break; + + case 'z': + line.length = 0; + if (debug) + debug_print_line (&line); + break; + + case '=': + output_missing_newline (&output_file); + fprintf (output_file.fp, "%lu%c", + (unsigned long)input->line_number, + buffer_delimiter); + flush_output (output_file.fp); + break; + + case 'F': + output_missing_newline (&output_file); + fprintf (output_file.fp, "%s%c", + input->in_file_name, + buffer_delimiter); + flush_output (output_file.fp); + break; + + default: + panic ("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd); + } + } + + /* this is buried down here so that a "continue" statement can skip it */ + ++cur_cmd; + } + + if (debug) + debug_print_end_of_cycle (); + if (!no_default_output) + output_line (line.active, line.length, line.chomped, &output_file); + return -1; +} + + +/* Apply the compiled script to all the named files. */ +int +process_files (struct vector *the_program, char **argv) +{ + static char dash[] = "-"; + static char *stdin_argv[2] = { dash, NULL }; + struct input input; + int status; + + line_init (&line, NULL, INITIAL_BUFFER_SIZE); + line_init (&hold, NULL, 0); + line_init (&buffer, NULL, 0); + + input.reset_at_next_file = true; + if (argv && *argv) + input.file_list = argv; + else if (in_place_extension) + panic (_("no input files")); + else + input.file_list = stdin_argv; + + input.bad_count = 0; + input.line_number = 0; + input.read_fn = read_always_fail; + input.fp = NULL; + + status = EXIT_SUCCESS; + while (read_pattern_space (&input, the_program, false)) + { + if (debug) + { + debug_print_input (&input); + debug_print_line (&line); + } + + status = execute_program (the_program, &input); + if (status == -1) + status = EXIT_SUCCESS; + else + break; + } + closedown (&input); + +#ifdef lint + /* We're about to exit, so these free()s are redundant. + But if we're running under a memory-leak detecting + implementation of malloc(), we want to explicitly + deallocate in order to avoid extraneous noise from + the allocator. */ + release_append_queue (); + free (buffer.text); + free (hold.text); + free (line.text); + free (s_accum.text); +#endif /* lint */ + + if (input.bad_count) + status = EXIT_BAD_INPUT; + + return status; +} diff --git a/sed/local.mk b/sed/local.mk new file mode 100644 index 0000000..5ccaaf4 --- /dev/null +++ b/sed/local.mk @@ -0,0 +1,58 @@ +# Copyright (C) 2016-2018 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +bin_PROGRAMS += sed/sed + +localedir = $(datadir)/locale + +sed_sed_SOURCES = \ + sed/compile.c \ + sed/debug.c \ + sed/execute.c \ + sed/mbcs.c \ + sed/regexp.c \ + sed/sed.c \ + sed/utils.c + +noinst_HEADERS += \ + sed/sed.h \ + sed/utils.h + +sed_sed_CPPFLAGS = $(AM_CPPFLAGS) -DLOCALEDIR=\"$(localedir)\" +sed_sed_CFLAGS = $(AM_CFLAGS) $(WARN_CFLAGS) $(WERROR_CFLAGS) +sed_sed_LDADD = sed/libver.a lib/libsed.a $(INTLLIBS) $(LIB_ACL) $(LIB_SELINUX) +sed_sed_DEPENDENCIES = lib/libsed.a sed/libver.a + +$(sed_sed_OBJECTS): $(BUILT_SOURCES) + +BUILT_SOURCES += sed/version.c +DISTCLEANFILES += sed/version.c +sed/version.c: Makefile + $(AM_V_GEN)rm -f $@ + $(AM_V_at)printf '#include <config.h>\n' > $@t + $(AM_V_at)printf 'char const *Version = "$(PACKAGE_VERSION)";\n' >> $@t + $(AM_V_at)chmod a-w $@t + $(AM_V_at)mv $@t $@ + +BUILT_SOURCES += sed/version.h +DISTCLEANFILES += sed/version.h +sed/version.h: Makefile + $(AM_V_GEN)rm -f $@ + $(AM_V_at)printf 'extern char const *Version;\n' > $@t + $(AM_V_at)chmod a-w $@t + $(AM_V_at)mv $@t $@ + +noinst_LIBRARIES += sed/libver.a +nodist_sed_libver_a_SOURCES = sed/version.c sed/version.h diff --git a/sed/mbcs.c b/sed/mbcs.c new file mode 100644 index 0000000..c94b219 --- /dev/null +++ b/sed/mbcs.c @@ -0,0 +1,76 @@ +/* GNU SED, a batch stream editor. + Copyright (C) 2003-2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; If not, see <https://www.gnu.org/licenses/>. */ + +#include "sed.h" +#include <stdlib.h> +#include <string.h> + +#include "localcharset.h" + +int mb_cur_max; +bool is_utf8; + +/* Return non-zero if CH is part of a valid multibyte sequence: + Either incomplete yet valid sequence (in case of a leading byte), + or the last byte of a valid multibyte sequence. + + Return zero in all other cases: + CH is a valid single-byte character (e.g. 0x01-0x7F in UTF-8 locales); + CH is an invalid byte in a multibyte sequence for the currentl locale, + CH is the NUL byte. + + Reset CUR_STAT in the case of an invalid byte. +*/ +int +is_mb_char (int ch, mbstate_t *cur_stat) +{ + const char c = ch ; + const int mb_pending = !mbsinit (cur_stat); + const int result = mbrtowc (NULL, &c, 1, cur_stat); + + switch (result) + { + case -2: /* Beginning or middle of valid multibyte sequence */ + return 1; + + case -1: /* Invalid sequence, byte treated like a single-byte character */ + memset (cur_stat, 0, sizeof (mbstate_t)); + return 0; + + case 1: /* A valid byte, check if part of on-going multibyte sequence */ + return mb_pending; + + case 0: /* Special case of mbrtowc(3): the NUL character */ + /* TODO: test this */ + return 1; + + default: /* Should never happen, as per mbrtowc(3) documentation */ + panic ("is_mb_char: mbrtowc (0x%x) returned %d", + (unsigned int) ch, result); + } +} + +void +initialize_mbcs (void) +{ + /* For UTF-8, we know that the encoding is stateless. */ + const char *codeset_name; + + codeset_name = locale_charset (); + is_utf8 = (strcmp (codeset_name, "UTF-8") == 0); + + mb_cur_max = MB_CUR_MAX; +} diff --git a/sed/regexp.c b/sed/regexp.c new file mode 100644 index 0000000..4ac06d6 --- /dev/null +++ b/sed/regexp.c @@ -0,0 +1,379 @@ +/* GNU SED, a batch stream editor. + Copyright (C) 1999-2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; If not, see <https://www.gnu.org/licenses/>. */ + +#include "sed.h" + +#include <ctype.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include "xalloc.h" + +#ifdef gettext_noop +# define N_(String) gettext_noop(String) +#else +# define N_(String) (String) +#endif + +extern bool use_extended_syntax_p; + +static const char errors[] = + "no previous regular expression\0" + "cannot specify modifiers on empty regexp"; + +#define NO_REGEX (errors) +#define BAD_MODIF (NO_REGEX + sizeof(N_("no previous regular expression"))) + + +void +dfaerror (char const *mesg) +{ + panic ("%s", mesg); +} + +void +dfawarn (char const *mesg) +{ + if (!getenv ("POSIXLY_CORRECT")) + dfaerror (mesg); +} + + +static void +compile_regex_1 (struct regex *new_regex, int needed_sub) +{ + const char *error; + int syntax = ((extended_regexp_flags & REG_EXTENDED) + ? RE_SYNTAX_POSIX_EXTENDED + : RE_SYNTAX_POSIX_BASIC); + + syntax &= ~RE_DOT_NOT_NULL; + syntax |= RE_NO_POSIX_BACKTRACKING; + + switch (posixicity) + { + case POSIXLY_EXTENDED: + syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD; + break; + case POSIXLY_CORRECT: + syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD; + break; + case POSIXLY_BASIC: + syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS; + if (!(extended_regexp_flags & REG_EXTENDED)) + syntax |= RE_LIMITED_OPS; + break; + } + + if (new_regex->flags & REG_ICASE) + syntax |= RE_ICASE; + else + new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8)); + syntax |= needed_sub ? 0 : RE_NO_SUB; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (new_regex->flags & REG_NEWLINE) + { + /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + } + + re_set_syntax (syntax); + error = re_compile_pattern (new_regex->re, new_regex->sz, + &new_regex->pattern); + new_regex->pattern.newline_anchor = + buffer_delimiter == '\n' && (new_regex->flags & REG_NEWLINE) != 0; + + new_regex->pattern.translate = NULL; +#ifndef RE_ICASE + if (new_regex->flags & REG_ICASE) + { + static char translate[1 << (sizeof (char) * 8)]; + int i; + for (i = 0; i < sizeof (translate) / sizeof (char); i++) + translate[i] = tolower (i); + + new_regex->pattern.translate = translate; + } +#endif + + if (error) + bad_prog (error); + + /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */ + if (needed_sub + && new_regex->pattern.re_nsub < needed_sub - 1 + && posixicity == POSIXLY_EXTENDED) + { + char buf[200]; + sprintf (buf, _("invalid reference \\%d on `s' command's RHS"), + needed_sub - 1); + bad_prog (buf); + } + + int dfaopts = buffer_delimiter == '\n' ? 0 : DFA_EOL_NUL; + new_regex->dfa = dfaalloc (); + dfasyntax (new_regex->dfa, &localeinfo, syntax, dfaopts); + dfacomp (new_regex->re, new_regex->sz, new_regex->dfa, 1); + + /* The patterns which consist of only ^ or $ often appear in + substitution, but regex and dfa are not good at them, as regex does + not build fastmap, and as all in buffer must be scanned for $. So + we mark them to handle manually. */ + if (new_regex->sz == 1) + { + if (new_regex->re[0] == '^') + new_regex->begline = true; + if (new_regex->re[0] == '$') + new_regex->endline = true; + } +} + +struct regex * +compile_regex (struct buffer *b, int flags, int needed_sub) +{ + struct regex *new_regex; + size_t re_len; + + /* // matches the last RE */ + if (size_buffer (b) == 0) + { + if (flags > 0) + bad_prog (_(BAD_MODIF)); + return NULL; + } + + re_len = size_buffer (b); + new_regex = xzalloc (sizeof (struct regex) + re_len - 1); + new_regex->flags = flags; + memcpy (new_regex->re, get_buffer (b), re_len); + + /* GNU regex does not process \t & co. */ + new_regex->sz = normalize_text (new_regex->re, re_len, TEXT_REGEX); + + compile_regex_1 (new_regex, needed_sub); + return new_regex; +} + +int +match_regex (struct regex *regex, char *buf, size_t buflen, + size_t buf_start_offset, struct re_registers *regarray, + int regsize) +{ + int ret; + static struct regex *regex_last; + + /* printf ("Matching from %d/%d\n", buf_start_offset, buflen); */ + + /* Keep track of the last regexp matched. */ + if (!regex) + { + regex = regex_last; + if (!regex_last) + bad_prog (_(NO_REGEX)); + } + else + regex_last = regex; + + /* gnulib's re_search uses signed-int as length */ + if (buflen >= INT_MAX) + panic (_("regex input buffer length larger than INT_MAX")); + + if (regex->pattern.no_sub && regsize) + { + /* Re-compiling an existing regex, free the previously allocated + structures. */ + if (regex->dfa) + { + dfafree (regex->dfa); + free (regex->dfa); + regex->dfa = NULL; + } + regfree (®ex->pattern); + + compile_regex_1 (regex, regsize); + } + + regex->pattern.regs_allocated = REGS_REALLOCATE; + + /* Optimized handling for '^' and '$' patterns */ + if (regex->begline || regex->endline) + { + size_t offset; + + if (regex->endline) + { + const char *p = NULL; + + if (regex->flags & REG_NEWLINE) + p = memchr (buf + buf_start_offset, buffer_delimiter, + buflen - buf_start_offset); + + offset = p ? p - buf : buflen; + } + else if (buf_start_offset == 0) + /* begline anchor, starting at beginning of the buffer. */ + offset = 0; + else if (!(regex->flags & REG_NEWLINE)) + /* begline anchor, starting in the middle of the text buffer, + and multiline regex is not specified - will never match. + Example: seq 2 | sed 'N;s/^/X/g' */ + return 0; + else if (buf[buf_start_offset - 1] == buffer_delimiter) + /* begline anchor, starting in the middle of the text buffer, + with multiline match, and the current character + is the line delimiter - start here. + Example: seq 2 | sed 'N;s/^/X/mg' */ + offset = buf_start_offset; + else + { + /* begline anchor, starting in the middle of the search buffer, + all previous optimizions didn't work: search + for the next line delimiter character in the buffer, + and start from there if found. */ + const char *p = memchr (buf + buf_start_offset, buffer_delimiter, + buflen - buf_start_offset); + + if (p == NULL) + return 0; + + offset = p - buf + 1; + } + + if (regsize) + { + size_t i; + + if (!regarray->start) + { + regarray->start = XCALLOC (1, regoff_t); + regarray->end = XCALLOC (1, regoff_t); + regarray->num_regs = 1; + } + + regarray->start[0] = offset; + regarray->end[0] = offset; + + for (i = 1 ; i < regarray->num_regs; ++i) + regarray->start[i] = regarray->end[i] = -1; + } + + return 1; + } + + if (buf_start_offset == 0) + { + struct dfa *superset = dfasuperset (regex->dfa); + + if (superset && !dfaexec (superset, buf, buf + buflen, true, NULL, NULL)) + return 0; + + if ((!regsize && (regex->flags & REG_NEWLINE)) + || (!superset && dfaisfast (regex->dfa))) + { + bool backref = false; + + if (!dfaexec (regex->dfa, buf, buf + buflen, true, NULL, &backref)) + return 0; + + if (!regsize && (regex->flags & REG_NEWLINE) && !backref) + return 1; + } + } + + /* If the buffer delimiter is not newline character, we cannot use + newline_anchor flag of regex. So do it line-by-line, and add offset + value to results. */ + if ((regex->flags & REG_NEWLINE) && buffer_delimiter != '\n') + { + const char *beg, *end; + const char *start; + + beg = buf; + + if (buf_start_offset > 0) + { + const char *eol = memrchr (buf, buffer_delimiter, buf_start_offset); + + if (eol != NULL) + beg = eol + 1; + } + + start = buf + buf_start_offset; + + for (;;) + { + end = memchr (beg, buffer_delimiter, buf + buflen - beg); + + if (end == NULL) + end = buf + buflen; + + ret = re_search (®ex->pattern, beg, end - beg, + start - beg, end - start, + regsize ? regarray : NULL); + + if (ret > -1) + { + size_t i; + + ret += beg - buf; + + if (regsize) + { + for (i = 0; i < regarray->num_regs; ++i) + { + if (regarray->start[i] > -1) + regarray->start[i] += beg - buf; + if (regarray->end[i] > -1) + regarray->end[i] += beg - buf; + } + } + + break; + } + + if (end == buf + buflen) + break; + + beg = start = end + 1; + } + } + else + ret = re_search (®ex->pattern, buf, buflen, buf_start_offset, + buflen - buf_start_offset, + regsize ? regarray : NULL); + + return (ret > -1); +} + + +#ifdef lint +void +release_regex (struct regex *regex) +{ + if (regex->dfa) + { + dfafree (regex->dfa); + free (regex->dfa); + regex->dfa = NULL; + } + regfree (®ex->pattern); + free (regex); +} +#endif /* lint */ diff --git a/sed/sed.c b/sed/sed.c new file mode 100644 index 0000000..e588c56 --- /dev/null +++ b/sed/sed.c @@ -0,0 +1,388 @@ +/* GNU SED, a batch stream editor. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; If not, see <https://www.gnu.org/licenses/>. */ + + +#include "sed.h" + + +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <sys/types.h> +#include <unistd.h> +#include "binary-io.h" +#include "getopt.h" +#include "progname.h" +#include "version.h" +#include "xalloc.h" + +#include "version-etc.h" + +#define AUTHORS \ + _("Jay Fenlason"), \ + _("Tom Lord"), \ + _("Ken Pizzini"), \ + _("Paolo Bonzini"), \ + _("Jim Meyering"), \ + _("Assaf Gordon") + +int extended_regexp_flags = 0; + +/* one-byte buffer delimiter */ +char buffer_delimiter = '\n'; + +/* If set, fflush(stdout) on every line output. */ +bool unbuffered = false; + +/* If set, don't write out the line unless explicitly told to */ +bool no_default_output = false; + +/* If set, reset line counts on every new file. */ +bool separate_files = false; + +/* If set, follow symlinks when processing in place */ +bool follow_symlinks = false; + +/* If set, opearate in 'sandbox' mode */ +bool sandbox = false; + +/* if set, print debugging information */ +bool debug = false; + +/* How do we edit files in-place? (we don't if NULL) */ +char *in_place_extension = NULL; + +/* The mode to use to read/write files, either "r"/"w" or "rb"/"wb". */ +char const *read_mode = "r"; +char const *write_mode = "w"; + +#if O_BINARY +/* Additional flag for binary mode on platforms with O_BINARY/O_TEXT. */ +bool binary_mode = false; +#endif + +/* Do we need to be pedantically POSIX compliant? */ +enum posixicity_types posixicity; + +/* How long should the `l' command's output line be? */ +countT lcmd_out_line_len = 70; + +/* The complete compiled SED program that we are going to run: */ +static struct vector *the_program = NULL; + +/* When we've created a temporary for an in-place update, + we may have to exit before the rename. This is the name + of the temporary that we'll have to unlink via an atexit- + registered cleanup function. */ +static char const *G_file_to_unlink; + +struct localeinfo localeinfo; + +/* When exiting between temporary file creation and the rename + associated with a sed -i invocation, remove that file. */ +static void +cleanup (void) +{ + IF_LINT (free (in_place_extension)); + if (G_file_to_unlink) + unlink (G_file_to_unlink); +} + +/* Note that FILE must be removed upon exit. */ +void +register_cleanup_file (char const *file) +{ + G_file_to_unlink = file; +} + +/* Clear the global file-to-unlink global. */ +void +cancel_cleanup (void) +{ + G_file_to_unlink = NULL; +} + +static void usage (int); +static void +contact (int errmsg) +{ + FILE *out = errmsg ? stderr : stdout; + fprintf (out, _("GNU sed home page: <https://www.gnu.org/software/sed/>.\n\ +General help using GNU software: <https://www.gnu.org/gethelp/>.\n")); + + /* Only print the bug report address for `sed --help', otherwise we'll + get reports for other people's bugs. */ + if (!errmsg) + fprintf (out, _("E-mail bug reports to: <%s>.\n"), PACKAGE_BUGREPORT); +} + +_Noreturn static void +usage (int status) +{ + FILE *out = status ? stderr : stdout; + + fprintf (out, _("\ +Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n\ +\n"), program_name); + + fprintf (out, _(" -n, --quiet, --silent\n\ + suppress automatic printing of pattern space\n")); + fprintf (out, _(" --debug\n\ + annotate program execution\n")); + fprintf (out, _(" -e script, --expression=script\n\ + add the script to the commands to be executed\n")); + fprintf (out, _(" -f script-file, --file=script-file\n\ + add the contents of script-file to the commands" \ + " to be executed\n")); +#ifdef ENABLE_FOLLOW_SYMLINKS + fprintf (out, _(" --follow-symlinks\n\ + follow symlinks when processing in place\n")); +#endif + fprintf (out, _(" -i[SUFFIX], --in-place[=SUFFIX]\n\ + edit files in place (makes backup if SUFFIX supplied)\n")); +#if O_BINARY + fprintf (out, _(" -b, --binary\n\ + open files in binary mode (CR+LFs are not" \ + " processed specially)\n")); +#endif + fprintf (out, _(" -l N, --line-length=N\n\ + specify the desired line-wrap length for the `l' command\n")); + fprintf (out, _(" --posix\n\ + disable all GNU extensions.\n")); + fprintf (out, _(" -E, -r, --regexp-extended\n\ + use extended regular expressions in the script\n\ + (for portability use POSIX -E).\n")); + fprintf (out, _(" -s, --separate\n\ + consider files as separate rather than as a single,\n\ + continuous long stream.\n")); + fprintf (out, _(" --sandbox\n\ + operate in sandbox mode (disable e/r/w commands).\n")); + fprintf (out, _(" -u, --unbuffered\n\ + load minimal amounts of data from the input files and flush\n\ + the output buffers more often\n")); + fprintf (out, _(" -z, --null-data\n\ + separate lines by NUL characters\n")); + fprintf (out, _(" --help display this help and exit\n")); + fprintf (out, _(" --version output version information and exit\n")); + fprintf (out, _("\n\ +If no -e, --expression, -f, or --file option is given, then the first\n\ +non-option argument is taken as the sed script to interpret. All\n\ +remaining arguments are names of input files; if no input files are\n\ +specified, then the standard input is read.\n\ +\n")); + contact (status); + + ck_fclose (NULL); + exit (status); +} + +int +main (int argc, char **argv) +{ +#define SHORTOPTS "bsnrzuEe:f:l:i::V:" + + enum { SANDBOX_OPTION = CHAR_MAX+1, + DEBUG_OPTION + }; + + static const struct option longopts[] = { + {"binary", 0, NULL, 'b'}, + {"regexp-extended", 0, NULL, 'r'}, + {"debug", 0, NULL, DEBUG_OPTION}, + {"expression", 1, NULL, 'e'}, + {"file", 1, NULL, 'f'}, + {"in-place", 2, NULL, 'i'}, + {"line-length", 1, NULL, 'l'}, + {"null-data", 0, NULL, 'z'}, + {"zero-terminated", 0, NULL, 'z'}, + {"quiet", 0, NULL, 'n'}, + {"posix", 0, NULL, 'p'}, + {"silent", 0, NULL, 'n'}, + {"sandbox", 0, NULL, SANDBOX_OPTION}, + {"separate", 0, NULL, 's'}, + {"unbuffered", 0, NULL, 'u'}, + {"version", 0, NULL, 'v'}, + {"help", 0, NULL, 'h'}, +#ifdef ENABLE_FOLLOW_SYMLINKS + {"follow-symlinks", 0, NULL, 'F'}, +#endif + {NULL, 0, NULL, 0} + }; + + int opt; + int return_code; + const char *cols = getenv ("COLS"); + + set_program_name (argv[0]); + initialize_main (&argc, &argv); +#if HAVE_SETLOCALE + /* Set locale according to user's wishes. */ + setlocale (LC_ALL, ""); +#endif + initialize_mbcs (); + init_localeinfo (&localeinfo); + + /* Arrange to remove any un-renamed temporary file, + upon premature exit. */ + atexit (cleanup); + +#if ENABLE_NLS + + /* Tell program which translations to use and where to find. */ + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); +#endif + + if (getenv ("POSIXLY_CORRECT") != NULL) + posixicity = POSIXLY_CORRECT; + else + posixicity = POSIXLY_EXTENDED; + + /* If environment variable `COLS' is set, use its value for + the baseline setting of `lcmd_out_line_len'. The "-1" + is to avoid gratuitous auto-line-wrap on ttys. + */ + if (cols) + { + countT t = atoi (cols); + if (t > 1) + lcmd_out_line_len = t-1; + } + + while ((opt = getopt_long (argc, argv, SHORTOPTS, longopts, NULL)) != EOF) + { + switch (opt) + { + case 'n': + no_default_output = true; + break; + case 'e': + the_program = compile_string (the_program, optarg, strlen (optarg)); + break; + case 'f': + the_program = compile_file (the_program, optarg); + break; + + case 'z': + buffer_delimiter = 0; + break; + + case 'F': + follow_symlinks = true; + break; + + case 'i': + separate_files = true; + IF_LINT (free (in_place_extension)); + if (optarg == NULL) + /* use no backups */ + in_place_extension = xstrdup ("*"); + + else if (strchr (optarg, '*') != NULL) + in_place_extension = xstrdup (optarg); + + else + { + in_place_extension = XCALLOC (strlen (optarg) + 2, char); + in_place_extension[0] = '*'; + strcpy (in_place_extension + 1, optarg); + } + + break; + + case 'l': + lcmd_out_line_len = atoi (optarg); + break; + + case 'p': + posixicity = POSIXLY_BASIC; + break; + + case 'b': + read_mode = "rb"; + write_mode = "wb"; +#if O_BINARY + binary_mode = true; +#endif + break; + + case 'E': + case 'r': + extended_regexp_flags = REG_EXTENDED; + break; + + case 's': + separate_files = true; + break; + + case SANDBOX_OPTION: + sandbox = true; + break; + + case DEBUG_OPTION: + debug = true; + break; + + case 'u': + unbuffered = true; + break; + + case 'v': + version_etc (stdout, program_name, PACKAGE_NAME, Version, + AUTHORS, (char *) NULL); + contact (false); + ck_fclose (NULL); + exit (EXIT_SUCCESS); + case 'h': + usage (EXIT_SUCCESS); + default: + usage (EXIT_BAD_USAGE); + } + } + + if (!the_program) + { + if (optind < argc) + { + char *arg = argv[optind++]; + the_program = compile_string (the_program, arg, strlen (arg)); + } + else + usage (EXIT_BAD_USAGE); + } + check_final_program (the_program); + +#if O_BINARY + if (binary_mode) + { + if (set_binary_mode ( fileno (stdin), O_BINARY) == -1) + panic (_("failed to set binary mode on STDIN")); + if (set_binary_mode ( fileno (stdout), O_BINARY) == -1) + panic (_("failed to set binary mode on STDOUT")); + } +#endif + + if (debug) + debug_print_program (the_program); + + return_code = process_files (the_program, argv+optind); + + finish_program (the_program); + ck_fclose (NULL); + + return return_code; +} diff --git a/sed/sed.h b/sed/sed.h new file mode 100644 index 0000000..cb28736 --- /dev/null +++ b/sed/sed.h @@ -0,0 +1,293 @@ +/* GNU SED, a batch stream editor. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> +#include "basicdefs.h" +#include "dfa.h" +#include "localeinfo.h" +#include "regex.h" +#include <stdio.h> +#include "unlocked-io.h" + +#include "utils.h" + +/* Struct vector is used to describe a compiled sed program. */ +struct vector { + struct sed_cmd *v; /* a dynamically allocated array */ + size_t v_allocated; /* ... number of slots allocated */ + size_t v_length; /* ... number of slots in use */ +}; + +/* This structure tracks files used by sed so that they may all be + closed cleanly at normal program termination. A flag is kept that tells + if a missing newline was encountered, so that it is added on the + next line and the two lines are not concatenated. */ +struct output { + char *name; + bool missing_newline; + FILE *fp; + struct output *link; +}; + +struct text_buf { + char *text; + size_t text_length; +}; + +struct regex { + regex_t pattern; + int flags; + size_t sz; + struct dfa *dfa; + bool begline; + bool endline; + char re[1]; +}; + +enum replacement_types { + REPL_ASIS = 0, + REPL_UPPERCASE = 1, + REPL_LOWERCASE = 2, + REPL_UPPERCASE_FIRST = 4, + REPL_LOWERCASE_FIRST = 8, + REPL_MODIFIERS = REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST, + + /* These are given to aid in debugging */ + REPL_UPPERCASE_UPPERCASE = REPL_UPPERCASE_FIRST | REPL_UPPERCASE, + REPL_UPPERCASE_LOWERCASE = REPL_UPPERCASE_FIRST | REPL_LOWERCASE, + REPL_LOWERCASE_UPPERCASE = REPL_LOWERCASE_FIRST | REPL_UPPERCASE, + REPL_LOWERCASE_LOWERCASE = REPL_LOWERCASE_FIRST | REPL_LOWERCASE +}; + +enum text_types { + TEXT_BUFFER, + TEXT_REPLACEMENT, + TEXT_REGEX +}; + +enum posixicity_types { + POSIXLY_EXTENDED, /* with GNU extensions */ + POSIXLY_CORRECT, /* with POSIX-compatible GNU extensions */ + POSIXLY_BASIC /* pedantically POSIX */ +}; + +enum addr_state { + RANGE_INACTIVE, /* never been active */ + RANGE_ACTIVE, /* between first and second address */ + RANGE_CLOSED /* like RANGE_INACTIVE, but range has ended once */ +}; + +enum addr_types { + ADDR_IS_NULL, /* null address */ + ADDR_IS_REGEX, /* a.addr_regex is valid */ + ADDR_IS_NUM, /* a.addr_number is valid */ + ADDR_IS_NUM_MOD, /* a.addr_number is valid, addr_step is modulo */ + ADDR_IS_STEP, /* address is +N (only valid for addr2) */ + ADDR_IS_STEP_MOD, /* address is ~N (only valid for addr2) */ + ADDR_IS_LAST /* address is $ */ +}; + +struct addr { + enum addr_types addr_type; + countT addr_number; + countT addr_step; + struct regex *addr_regex; +}; + + +struct replacement { + char *prefix; + size_t prefix_length; + int subst_id; + enum replacement_types repl_type; + struct replacement *next; +}; + +struct subst { + struct regex *regx; + struct replacement *replacement; + countT numb; /* if >0, only substitute for match number "numb" */ + struct output *outf; /* 'w' option given */ + unsigned global : 1; /* 'g' option given */ + unsigned print : 2; /* 'p' option given (before/after eval) */ + unsigned eval : 1; /* 'e' option given */ + unsigned max_id : 4; /* maximum backreference on the RHS */ +#ifdef lint + char* replacement_buffer; +#endif +}; + + + + +struct sed_cmd { + struct addr *a1; /* save space: usually is NULL */ + struct addr *a2; + + /* See description the enum, above. */ + enum addr_state range_state; + + /* Non-zero if command is to be applied to non-matches. */ + char addr_bang; + + /* The actual command character. */ + char cmd; + + /* auxiliary data for various commands */ + union { + /* This structure is used for a, i, and c commands. */ + struct text_buf cmd_txt; + + /* This is used for the l, q and Q commands. */ + int int_arg; + + /* This is used for the {}, b, and t commands. */ + countT jump_index; + + /* This is used for the r command. */ + char *fname; + + /* This is used for the hairy s command. */ + struct subst *cmd_subst; + + /* This is used for the w command. */ + struct output *outf; + + /* This is used for the R command. + (despite the struct name, it is used for both in and out files). */ + struct output *inf; + + /* This is used for the y command. */ + unsigned char *translate; + char **translatemb; + + /* This is used for the ':' command (debug only). */ + char* label_name; + } x; +}; + + +_Noreturn void bad_prog (const char *why); +size_t normalize_text (char *text, size_t len, enum text_types buftype); +struct vector *compile_string (struct vector *, char *str, size_t len); +struct vector *compile_file (struct vector *, const char *cmdfile); +void check_final_program (struct vector *); +void rewind_read_files (void); +void finish_program (struct vector *); + +struct regex *compile_regex (struct buffer *b, int flags, int needed_sub); +int match_regex (struct regex *regex, + char *buf, size_t buflen, size_t buf_start_offset, + struct re_registers *regarray, int regsize); +#ifdef lint +void release_regex (struct regex *); +#endif + +void +debug_print_command (const struct vector *program, const struct sed_cmd *sc); +void +debug_print_program (const struct vector *program); +void +debug_print_char (char c); + +int process_files (struct vector *, char **argv); + +int main (int, char **); + +extern struct localeinfo localeinfo; + +extern int extended_regexp_flags; + +/* one-byte buffer delimiter */ +extern char buffer_delimiter; + +/* If set, fflush(stdout) on every line output, + and turn off stream buffering on inputs. */ +extern bool unbuffered; + +/* If set, don't write out the line unless explicitly told to. */ +extern bool no_default_output; + +/* If set, reset line counts on every new file. */ +extern bool separate_files; + +/* If set, follow symlinks when invoked with -i option */ +extern bool follow_symlinks; + +/* Do we need to be pedantically POSIX compliant? */ +extern enum posixicity_types posixicity; + +/* How long should the `l' command's output line be? */ +extern countT lcmd_out_line_len; + +/* How do we edit files in-place? (we don't if NULL) */ +extern char *in_place_extension; + +/* The mode to use to read and write files, either "rt"/"w" or "rb"/"wb". */ +extern char const *read_mode; +extern char const *write_mode; + +/* Should we use EREs? */ +extern bool use_extended_syntax_p; + +/* Declarations for multibyte character sets. */ +extern int mb_cur_max; +extern bool is_utf8; + +/* If set, operate in 'sandbox' mode - disable e/r/w commands */ +extern bool sandbox; + +/* If set, print debugging information. */ +extern bool debug; + +#define MBRTOWC(pwc, s, n, ps) \ + (mb_cur_max == 1 ? \ + (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \ + mbrtowc ((pwc), (s), (n), (ps))) + +#define WCRTOMB(s, wc, ps) \ + (mb_cur_max == 1 ? \ + (*(s) = wctob ((wint_t) (wc)), 1) : \ + wcrtomb ((s), (wc), (ps))) + +#define MBSINIT(s) \ + (mb_cur_max == 1 ? 1 : mbsinit ((s))) + +#define MBRLEN(s, n, ps) \ + (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps)) + +#define IS_MB_CHAR(ch, ps) \ + (mb_cur_max == 1 ? 0 : is_mb_char (ch, ps)) + +extern int is_mb_char (int ch, mbstate_t *ps); +extern void initialize_mbcs (void); +extern void register_cleanup_file (char const *file); +extern void cancel_cleanup (void); + +/* Use this to suppress gcc's '...may be used before initialized' warnings. */ +#ifdef lint +# define IF_LINT(Code) Code +#else +# define IF_LINT(Code) /* empty */ +#endif + +#ifndef FALLTHROUGH +# if __GNUC__ < 7 +# define FALLTHROUGH ((void) 0) +# else +# define FALLTHROUGH __attribute__ ((__fallthrough__)) +# endif +#endif diff --git a/sed/utils.c b/sed/utils.c new file mode 100644 index 0000000..6f847cd --- /dev/null +++ b/sed/utils.c @@ -0,0 +1,499 @@ +/* Functions from hack's utils library. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; If not, see <https://www.gnu.org/licenses/>. */ + +#include <config.h> + +#include <stdio.h> +#include <stdarg.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <limits.h> + +#include "binary-io.h" +#include "unlocked-io.h" +#include "utils.h" +#include "progname.h" +#include "fwriting.h" +#include "xalloc.h" + +#if O_BINARY +extern bool binary_mode; +#endif + +/* Store information about files opened with ck_fopen + so that error messages from ck_fread, ck_fwrite, etc. can print the + name of the file that had the error */ + +struct open_file + { + FILE *fp; + char *name; + struct open_file *link; + unsigned temp : 1; + }; + +static struct open_file *open_files = NULL; +static void do_ck_fclose (FILE *fp); + +/* Print an error message and exit */ + +void +panic (const char *str, ...) +{ + va_list ap; + + fprintf (stderr, "%s: ", program_name); + va_start (ap, str); + vfprintf (stderr, str, ap); + va_end (ap); + putc ('\n', stderr); + + /* Unlink the temporary files. */ + while (open_files) + { + if (open_files->temp) + { + fclose (open_files->fp); + errno = 0; + unlink (open_files->name); + if (errno != 0) + fprintf (stderr, _("cannot remove %s: %s"), open_files->name, + strerror (errno)); + } + +#ifdef lint + struct open_file *next = open_files->link; + free (open_files->name); + free (open_files); + open_files = next; +#else + open_files = open_files->link; +#endif + } + + exit (EXIT_PANIC); +} + +/* Internal routine to get a filename from open_files */ +static const char * _GL_ATTRIBUTE_PURE +utils_fp_name (FILE *fp) +{ + struct open_file *p; + + for (p=open_files; p; p=p->link) + if (p->fp == fp) + return p->name; + if (fp == stdin) + return "stdin"; + else if (fp == stdout) + return "stdout"; + else if (fp == stderr) + return "stderr"; + + return "<unknown>"; +} + +static void +register_open_file (FILE *fp, const char *name) +{ + struct open_file *p; + for (p=open_files; p; p=p->link) + { + if (fp == p->fp) + { + free (p->name); + break; + } + } + if (!p) + { + p = XCALLOC (1, struct open_file); + p->link = open_files; + open_files = p; + } + p->name = xstrdup (name); + p->fp = fp; + p->temp = false; +} + +/* Panic on failing fopen */ +FILE * +ck_fopen (const char *name, const char *mode, int fail) +{ + FILE *fp; + + fp = fopen (name, mode); + if (!fp) + { + if (fail) + panic (_("couldn't open file %s: %s"), name, strerror (errno)); + + return NULL; + } + + register_open_file (fp, name); + return fp; +} + +/* Panic on failing fdopen */ +FILE * +ck_fdopen ( int fd, const char *name, const char *mode, int fail) +{ + FILE *fp; + + fp = fdopen (fd, mode); + if (!fp) + { + if (fail) + panic (_("couldn't attach to %s: %s"), name, strerror (errno)); + + return NULL; + } + + register_open_file (fp, name); + return fp; +} + +FILE * +ck_mkstemp (char **p_filename, const char *tmpdir, + const char *base, const char *mode) +{ + char *template = xmalloc (strlen (tmpdir) + strlen (base) + 8); + sprintf (template, "%s/%sXXXXXX", tmpdir, base); + + /* The ownership might change, so omit some permissions at first + so unauthorized users cannot nip in before the file is ready. + mkstemp forces O_BINARY on cygwin, so use mkostemp instead. */ + mode_t save_umask = umask (0700); + int fd = mkostemp (template, 0); + umask (save_umask); + if (fd == -1) + panic (_("couldn't open temporary file %s: %s"), template, + strerror (errno)); +#if O_BINARY + if (binary_mode && (set_binary_mode ( fd, O_BINARY) == -1)) + panic (_("failed to set binary mode on '%s'"), template); +#endif + + *p_filename = template; + FILE *fp = fdopen (fd, mode); + register_open_file (fp, template); + return fp; +} + +/* Panic on failing fwrite */ +void +ck_fwrite (const void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + clearerr (stream); + if (size && fwrite (ptr, size, nmemb, stream) != nmemb) + panic (ngettext ("couldn't write %llu item to %s: %s", + "couldn't write %llu items to %s: %s", nmemb), + (unsigned long long) nmemb, utils_fp_name (stream), + strerror (errno)); +} + +/* Panic on failing fread */ +size_t +ck_fread (void *ptr, size_t size, size_t nmemb, FILE *stream) +{ + clearerr (stream); + if (size && (nmemb=fread (ptr, size, nmemb, stream)) <= 0 && ferror (stream)) + panic (_("read error on %s: %s"), utils_fp_name (stream), strerror (errno)); + + return nmemb; +} + +size_t +ck_getdelim (char **text, size_t *buflen, char buffer_delimiter, FILE *stream) +{ + ssize_t result; + bool error; + + error = ferror (stream); + if (!error) + { + result = getdelim (text, buflen, buffer_delimiter, stream); + error = ferror (stream); + } + + if (error) + panic (_("read error on %s: %s"), utils_fp_name (stream), strerror (errno)); + + return result; +} + +/* Panic on failing fflush */ +void +ck_fflush (FILE *stream) +{ + if (!fwriting (stream)) + return; + + clearerr (stream); + if (fflush (stream) == EOF && errno != EBADF) + panic ("couldn't flush %s: %s", utils_fp_name (stream), strerror (errno)); +} + +/* Panic on failing fclose */ +void +ck_fclose (FILE *stream) +{ + struct open_file r; + struct open_file *prev; + struct open_file *cur; + + /* a NULL stream means to close all files */ + r.link = open_files; + prev = &r; + while ( (cur = prev->link) ) + { + if (!stream || stream == cur->fp) + { + do_ck_fclose (cur->fp); + prev->link = cur->link; + free (cur->name); + free (cur); + } + else + prev = cur; + } + + open_files = r.link; + + /* Also care about stdout, because if it is redirected the + last output operations might fail and it is important + to signal this as an error (perhaps to make). */ + if (!stream) + do_ck_fclose (stdout); +} + +/* Close a single file. */ +void +do_ck_fclose (FILE *fp) +{ + ck_fflush (fp); + clearerr (fp); + + if (fclose (fp) == EOF) + panic ("couldn't close %s: %s", utils_fp_name (fp), strerror (errno)); +} + +/* Follow symlink and panic if something fails. Return the ultimate + symlink target, stored in a temporary buffer that the caller should + not free. */ +const char * +follow_symlink (const char *fname) +{ +#ifdef ENABLE_FOLLOW_SYMLINKS + static char *buf1, *buf2; + static int buf_size; + + struct stat statbuf; + const char *buf = fname, *c; + int rc; + + if (buf_size == 0) + { + buf1 = xzalloc (PATH_MAX + 1); + buf2 = xzalloc (PATH_MAX + 1); + buf_size = PATH_MAX + 1; + } + + while ((rc = lstat (buf, &statbuf)) == 0 + && (statbuf.st_mode & S_IFLNK) == S_IFLNK) + { + if (buf == buf2) + { + strcpy (buf1, buf2); + buf = buf1; + } + + while ((rc = readlink (buf, buf2, buf_size)) == buf_size) + { + buf_size *= 2; + buf1 = xrealloc (buf1, buf_size); + buf2 = xrealloc (buf2, buf_size); + } + if (rc < 0) + panic (_("couldn't follow symlink %s: %s"), buf, strerror (errno)); + else + buf2 [rc] = '\0'; + + if (buf2[0] != '/' && (c = strrchr (buf, '/')) != NULL) + { + /* Need to handle relative paths with care. Reallocate buf1 and + buf2 to be big enough. */ + int len = c - buf + 1; + if (len + rc + 1 > buf_size) + { + buf_size = len + rc + 1; + buf1 = xrealloc (buf1, buf_size); + buf2 = xrealloc (buf2, buf_size); + } + + /* Always store the new path in buf1. */ + if (buf != buf1) + memcpy (buf1, buf, len); + + /* Tack the relative symlink at the end of buf1. */ + memcpy (buf1 + len, buf2, rc + 1); + buf = buf1; + } + else + { + /* Use buf2 as the buffer, it saves a strcpy if it is not pointing to + another link. It works for absolute symlinks, and as long as + symlinks do not leave the current directory. */ + buf = buf2; + } + } + + if (rc < 0) + panic (_("cannot stat %s: %s"), buf, strerror (errno)); + + return buf; +#else + return fname; +#endif /* ENABLE_FOLLOW_SYMLINKS */ +} + +/* Panic on failing rename */ +void +ck_rename (const char *from, const char *to, const char *unlink_if_fail) +{ + int rd = rename (from, to); + if (rd != -1) + return; + + if (unlink_if_fail) + { + int save_errno = errno; + errno = 0; + unlink (unlink_if_fail); + + /* Failure to remove the temporary file is more severe, + so trigger it first. */ + if (errno != 0) + panic (_("cannot remove %s: %s"), unlink_if_fail, strerror (errno)); + + errno = save_errno; + } + + panic (_("cannot rename %s: %s"), from, strerror (errno)); +} + + + + +/* Implement a variable sized buffer of `stuff'. We don't know what it is, +nor do we care, as long as it doesn't mind being aligned by malloc. */ + +struct buffer + { + size_t allocated; + size_t length; + char *b; + }; + +#define MIN_ALLOCATE 50 + +struct buffer * +init_buffer (void) +{ + struct buffer *b = XCALLOC (1, struct buffer); + b->b = XCALLOC (MIN_ALLOCATE, char); + b->allocated = MIN_ALLOCATE; + b->length = 0; + return b; +} + +char * +get_buffer (struct buffer const *b) +{ + return b->b; +} + +size_t +size_buffer (struct buffer const *b) +{ + return b->length; +} + +static void +resize_buffer (struct buffer *b, size_t newlen) +{ + char *try = NULL; + size_t alen = b->allocated; + + if (newlen <= alen) + return; + alen *= 2; + if (newlen < alen) + try = realloc (b->b, alen); /* Note: *not* the REALLOC() macro! */ + if (!try) + { + alen = newlen; + try = REALLOC (b->b, alen, char); + } + b->allocated = alen; + b->b = try; +} + +char * +add_buffer (struct buffer *b, const char *p, size_t n) +{ + char *result; + if (b->allocated - b->length < n) + resize_buffer (b, b->length+n); + result = memcpy (b->b + b->length, p, n); + b->length += n; + return result; +} + +char * +add1_buffer (struct buffer *b, int c) +{ + /* This special case should be kept cheap; + * don't make it just a mere convenience + * wrapper for add_buffer() -- even "builtin" + * versions of memcpy(a, b, 1) can become + * expensive when called too often. + */ + if (c != EOF) + { + char *result; + if (b->allocated - b->length < 1) + resize_buffer (b, b->length+1); + result = b->b + b->length++; + *result = c; + return result; + } + + return NULL; +} + +void +free_buffer (struct buffer *b) +{ + if (b) + free (b->b); + free (b); +} diff --git a/sed/utils.h b/sed/utils.h new file mode 100644 index 0000000..810ac9f --- /dev/null +++ b/sed/utils.h @@ -0,0 +1,52 @@ +/* Functions from hack's utils library. + Copyright (C) 1989-2018 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; If not, see <https://www.gnu.org/licenses/>. */ + +#include <stdio.h> + +#include "basicdefs.h" + +enum exit_codes { + /* EXIT_SUCCESS is already defined as 0 */ + EXIT_BAD_USAGE = 1, /* bad program syntax, invalid command-line options */ + EXIT_BAD_INPUT = 2, /* failed to open some of the input files */ + EXIT_PANIC = 4 /* PANIC during program execution */ +}; + + +_Noreturn void panic (const char *str, ...) _GL_ATTRIBUTE_FORMAT_PRINTF (1, 2); + +FILE *ck_fopen (const char *name, const char *mode, int fail); +FILE *ck_fdopen (int fd, const char *name, const char *mode, int fail); +void ck_fwrite (const void *ptr, size_t size, size_t nmemb, FILE *stream); +size_t ck_fread (void *ptr, size_t size, size_t nmemb, FILE *stream); +void ck_fflush (FILE *stream); +void ck_fclose (FILE *stream); +const char *follow_symlink (const char *path); +size_t ck_getdelim (char **text, size_t *buflen, char buffer_delimiter, + FILE *stream); +FILE * ck_mkstemp (char **p_filename, const char *tmpdir, const char *base, + const char *mode) _GL_ARG_NONNULL ((1, 2, 3, 4)); +void ck_rename (const char *from, const char *to, const char *unlink_if_fail); + +void *ck_malloc (size_t size); +void *ck_realloc (void *ptr, size_t size); + +struct buffer *init_buffer (void); +char *get_buffer (struct buffer const *b) _GL_ATTRIBUTE_PURE; +size_t size_buffer (struct buffer const *b) _GL_ATTRIBUTE_PURE; +char *add_buffer (struct buffer *b, const char *p, size_t n); +char *add1_buffer (struct buffer *b, int ch); +void free_buffer (struct buffer *b); |