summaryrefslogtreecommitdiff
path: root/sed
diff options
context:
space:
mode:
Diffstat (limited to 'sed')
-rw-r--r--sed/compile.c1689
-rw-r--r--sed/debug.c456
-rw-r--r--sed/execute.c1705
-rw-r--r--sed/local.mk58
-rw-r--r--sed/mbcs.c76
-rw-r--r--sed/regexp.c379
-rw-r--r--sed/sed.c388
-rw-r--r--sed/sed.h293
-rw-r--r--sed/utils.c499
-rw-r--r--sed/utils.h52
10 files changed, 5595 insertions, 0 deletions
diff --git a/sed/compile.c b/sed/compile.c
new file mode 100644
index 0000000..ca58371
--- /dev/null
+++ b/sed/compile.c
@@ -0,0 +1,1689 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1989-2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+/* compile.c: translate sed source into internal form */
+
+#include "sed.h"
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <obstack.h>
+#include "progname.h"
+#include "xalloc.h"
+
+#define YMAP_LENGTH 256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/
+#define VECTOR_ALLOC_INCREMENT 40
+
+/* let's not confuse text editors that have only dumb bracket-matching... */
+#define OPEN_BRACKET '['
+#define CLOSE_BRACKET ']'
+/* #define OPEN_BRACE '{' */
+#define CLOSE_BRACE '}'
+
+struct prog_info {
+ /* When we're reading a script command from a string, `prog.base'
+ points to the first character in the string, 'prog.cur' points
+ to the current character in the string, and 'prog.end' points
+ to the end of the string. This allows us to compile script
+ strings that contain nulls. */
+ const unsigned char *base;
+ const unsigned char *cur;
+ const unsigned char *end;
+
+ /* This is the current script file. If it is NULL, we are reading
+ from a string stored at `prog.cur' instead. If both `prog.file'
+ and `prog.cur' are NULL, we're in trouble! */
+ FILE *file;
+};
+
+/* Information used to give out useful and informative error messages. */
+struct error_info {
+ /* This is the name of the current script file. */
+ const char *name;
+
+ /* This is the number of the current script line that we're compiling. */
+ countT line;
+
+ /* This is the index of the "-e" expressions on the command line. */
+ countT string_expr_count;
+};
+
+
+/* Label structure used to resolve GOTO's, labels, and block beginnings. */
+struct sed_label {
+ countT v_index; /* index of vector element being referenced */
+ char *name; /* NUL-terminated name of the label */
+ struct error_info err_info; /* track where `{}' blocks start */
+ struct sed_label *next; /* linked list (stack) */
+};
+
+struct special_files {
+ struct output outf;
+ FILE **pfp;
+};
+
+static FILE *my_stdin, *my_stdout, *my_stderr;
+static struct special_files special_files[] = {
+ { { (char *) "/dev/stdin", false, NULL, NULL }, &my_stdin },
+ { { (char *) "/dev/stdout", false, NULL, NULL }, &my_stdout },
+ { { (char *) "/dev/stderr", false, NULL, NULL }, &my_stderr },
+ { { NULL, false, NULL, NULL }, NULL }
+};
+
+/* Where we are in the processing of the input. */
+static struct prog_info prog;
+static struct error_info cur_input;
+
+/* Information about labels and jumps-to-labels. This is used to do
+ the required backpatching after we have compiled all the scripts. */
+static struct sed_label *jumps = NULL;
+static struct sed_label *labels = NULL;
+
+/* We wish to detect #n magic only in the first input argument;
+ this flag tracks when we have consumed the first file of input. */
+static bool first_script = true;
+
+/* Allow for scripts like "sed -e 'i\' -e foo": */
+static struct buffer *pending_text = NULL;
+static struct text_buf *old_text_buf = NULL;
+
+/* Information about block start positions. This is used to backpatch
+ block end positions. */
+static struct sed_label *blocks = NULL;
+
+/* Use an obstack for compilation. */
+static struct obstack obs;
+
+/* Various error messages we may want to print */
+static const char errors[] =
+ "multiple `!'s\0"
+ "unexpected `,'\0"
+ "invalid usage of +N or ~N as first address\0"
+ "unmatched `{'\0"
+ "unexpected `}'\0"
+ "extra characters after command\0"
+ "expected \\ after `a', `c' or `i'\0"
+ "`}' doesn't want any addresses\0"
+ ": doesn't want any addresses\0"
+ "comments don't accept any addresses\0"
+ "missing command\0"
+ "command only uses one address\0"
+ "unterminated address regex\0"
+ "unterminated `s' command\0"
+ "unterminated `y' command\0"
+ "unknown option to `s'\0"
+ "multiple `p' options to `s' command\0"
+ "multiple `g' options to `s' command\0"
+ "multiple number options to `s' command\0"
+ "number option to `s' command may not be zero\0"
+ "strings for `y' command are different lengths\0"
+ "delimiter character is not a single-byte character\0"
+ "expected newer version of sed\0"
+ "invalid usage of line address 0\0"
+ "unknown command: `%c'\0"
+ "incomplete command\0"
+ "\":\" lacks a label\0"
+ "recursive escaping after \\c not allowed\0"
+ "e/r/w commands disabled in sandbox mode\0"
+ "missing filename in r/R/w/W commands";
+
+#define BAD_BANG (errors)
+#define BAD_COMMA (BAD_BANG + sizeof (N_("multiple `!'s")))
+#define BAD_STEP (BAD_COMMA + sizeof (N_("unexpected `,'")))
+#define EXCESS_OPEN_BRACE (BAD_STEP \
+ + sizeof (N_("invalid usage of +N or ~N as first address")))
+#define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof (N_("unmatched `{'")))
+#define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof (N_("unexpected `}'")))
+#define EXPECTED_SLASH (EXCESS_JUNK \
+ + sizeof (N_("extra characters after command")))
+#define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH \
+ + sizeof (N_("expected \\ after `a', `c' or `i'")))
+#define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR \
+ + sizeof (N_("`}' doesn't want any addresses")))
+#define NO_SHARP_ADDR (NO_COLON_ADDR \
+ + sizeof (N_(": doesn't want any addresses")))
+#define NO_COMMAND (NO_SHARP_ADDR \
+ + sizeof (N_("comments don't accept any addresses")))
+#define ONE_ADDR (NO_COMMAND + sizeof (N_("missing command")))
+#define UNTERM_ADDR_RE (ONE_ADDR + sizeof (N_("command only uses one address")))
+#define UNTERM_S_CMD (UNTERM_ADDR_RE \
+ + sizeof (N_("unterminated address regex")))
+#define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof (N_("unterminated `s' command")))
+#define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof (N_("unterminated `y' command")))
+#define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof (N_("unknown option to `s'")))
+#define EXCESS_G_OPT (EXCESS_P_OPT \
+ + sizeof (N_("multiple `p' options to `s' command")))
+#define EXCESS_N_OPT (EXCESS_G_OPT \
+ + sizeof (N_("multiple `g' options to `s' command")))
+#define ZERO_N_OPT (EXCESS_N_OPT \
+ + sizeof (N_("multiple number options to `s' command")))
+#define Y_CMD_LEN (ZERO_N_OPT \
+ + sizeof (N_("number option to `s' command may not be zero")))
+#define BAD_DELIM (Y_CMD_LEN \
+ + sizeof (N_("strings for `y' command are different lengths")))
+#define ANCIENT_VERSION (BAD_DELIM \
+ + sizeof (N_("delimiter character is not a single-byte character")))
+#define INVALID_LINE_0 (ANCIENT_VERSION \
+ + sizeof (N_("expected newer version of sed")))
+#define UNKNOWN_CMD (INVALID_LINE_0 \
+ + sizeof (N_("invalid usage of line address 0")))
+#define INCOMPLETE_CMD (UNKNOWN_CMD + sizeof (N_("unknown command: `%c'")))
+#define COLON_LACKS_LABEL (INCOMPLETE_CMD \
+ + sizeof (N_("incomplete command")))
+#define RECURSIVE_ESCAPE_C (COLON_LACKS_LABEL \
+ + sizeof (N_("\":\" lacks a label")))
+#define DISALLOWED_CMD (RECURSIVE_ESCAPE_C \
+ + sizeof (N_("recursive escaping after \\c not allowed")))
+#define MISSING_FILENAME (DISALLOWED_CMD \
+ + sizeof (N_( "e/r/w commands disabled in sandbox mode")))
+/* #define END_ERRORS (DISALLOWED_CMD \
+ + sizeof (N_( "e/r/w commands disabled in sandbox mode"))) */
+
+static struct output *file_read = NULL;
+static struct output *file_write = NULL;
+
+/* Complain about an unknown command and exit. */
+static void
+bad_command (char ch)
+{
+ const char *msg = _(UNKNOWN_CMD);
+ char *unknown_cmd = xmalloc (strlen (msg));
+ sprintf (unknown_cmd, msg, ch);
+ bad_prog (unknown_cmd);
+}
+
+/* Complain about a programming error and exit. */
+void
+bad_prog (const char *why)
+{
+ if (cur_input.name)
+ fprintf (stderr, _("%s: file %s line %lu: %s\n"), program_name,
+ cur_input.name, (unsigned long)cur_input.line, why);
+ else
+ fprintf (stderr, _("%s: -e expression #%lu, char %lu: %s\n"),
+ program_name,
+ (unsigned long)cur_input.string_expr_count,
+ (unsigned long)(prog.cur-prog.base),
+ why);
+ exit (EXIT_BAD_USAGE);
+}
+
+/* Read the next character from the program. Return EOF if there isn't
+ anything to read. Keep cur_input.line up to date, so error messages
+ can be meaningful. */
+static int
+inchar (void)
+{
+ int ch = EOF;
+
+ if (prog.cur)
+ {
+ if (prog.cur < prog.end)
+ ch = *prog.cur++;
+ }
+ else if (prog.file)
+ {
+ if (!feof (prog.file))
+ ch = getc (prog.file);
+ }
+ if (ch == '\n')
+ ++cur_input.line;
+ return ch;
+}
+
+/* unget `ch' so the next call to inchar will return it. */
+static void
+savchar (int ch)
+{
+ if (ch == EOF)
+ return;
+ if (ch == '\n' && cur_input.line > 0)
+ --cur_input.line;
+ if (prog.cur)
+ {
+ if (prog.cur <= prog.base || *--prog.cur != ch)
+ panic ("Called savchar with unexpected pushback (%x)",
+ (unsigned int) ch);
+ }
+ else
+ ungetc (ch, prog.file);
+}
+
+/* Read the next non-blank character from the program. */
+static int
+in_nonblank (void)
+{
+ int ch;
+ do
+ ch = inchar ();
+ while (ISBLANK (ch));
+ return ch;
+}
+
+/* Consume script input until a valid end of command marker is found:
+ comment, closing brace, newline, semicolon or EOF.
+ If any other character is found, die with 'extra characters after command'
+ error.
+*/
+static void
+read_end_of_cmd (void)
+{
+ const int ch = in_nonblank ();
+ if (ch == CLOSE_BRACE || ch == '#')
+ savchar (ch);
+ else if (ch != EOF && ch != '\n' && ch != ';')
+ bad_prog (_(EXCESS_JUNK));
+}
+
+/* Read an integer value from the program. */
+static countT
+in_integer (int ch)
+{
+ countT num = 0;
+
+ while (ISDIGIT (ch))
+ {
+ num = num * 10 + ch - '0';
+ ch = inchar ();
+ }
+ savchar (ch);
+ return num;
+}
+
+static int
+add_then_next (struct buffer *b, int ch)
+{
+ add1_buffer (b, ch);
+ return inchar ();
+}
+
+static char *
+convert_number (char *result, char *buf, const char *bufend, int base)
+{
+ int n = 0;
+ int max = 1;
+ char *p;
+
+ for (p=buf+1; p < bufend && max <= 255; ++p, max *= base)
+ {
+ int d = -1;
+ switch (*p)
+ {
+ case '0': d = 0x0; break;
+ case '1': d = 0x1; break;
+ case '2': d = 0x2; break;
+ case '3': d = 0x3; break;
+ case '4': d = 0x4; break;
+ case '5': d = 0x5; break;
+ case '6': d = 0x6; break;
+ case '7': d = 0x7; break;
+ case '8': d = 0x8; break;
+ case '9': d = 0x9; break;
+ case 'A': case 'a': d = 0xa; break;
+ case 'B': case 'b': d = 0xb; break;
+ case 'C': case 'c': d = 0xc; break;
+ case 'D': case 'd': d = 0xd; break;
+ case 'E': case 'e': d = 0xe; break;
+ case 'F': case 'f': d = 0xf; break;
+ }
+ if (d < 0 || base <= d)
+ break;
+ n = n * base + d;
+ }
+ if (p == buf+1)
+ *result = *buf;
+ else
+ *result = n;
+ return p;
+}
+
+/* Read in a filename for a `r', `w', or `s///w' command. */
+static struct buffer *
+read_filename (void)
+{
+ struct buffer *b;
+ int ch;
+
+ if (sandbox)
+ bad_prog (_(DISALLOWED_CMD));
+
+ b = init_buffer ();
+ ch = in_nonblank ();
+ while (ch != EOF && ch != '\n')
+ {
+#if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/
+ if (posixicity == POSIXLY_EXTENDED)
+ if (ch == ';' || ch == '#')
+ {
+ savchar (ch);
+ break;
+ }
+#endif
+ ch = add_then_next (b, ch);
+ }
+ add1_buffer (b, '\0');
+ return b;
+}
+
+static struct output *
+get_openfile (struct output **file_ptrs, const char *mode, int fail)
+{
+ struct buffer *b;
+ char *file_name;
+ struct output *p;
+
+ b = read_filename ();
+ file_name = get_buffer (b);
+ if (strlen (file_name) == 0)
+ bad_prog (_(MISSING_FILENAME));
+
+ for (p=*file_ptrs; p; p=p->link)
+ if (strcmp (p->name, file_name) == 0)
+ break;
+
+ if (posixicity == POSIXLY_EXTENDED)
+ {
+ /* Check whether it is a special file (stdin, stdout or stderr) */
+ struct special_files *special = special_files;
+
+ /* std* sometimes are not constants, so they
+ cannot be used in the initializer for special_files */
+ my_stdin = stdin; my_stdout = stdout; my_stderr = stderr;
+ for (special = special_files; special->outf.name; special++)
+ if (strcmp (special->outf.name, file_name) == 0)
+ {
+ special->outf.fp = *special->pfp;
+ free_buffer (b);
+ return &special->outf;
+ }
+ }
+
+ if (!p)
+ {
+ p = OB_MALLOC (&obs, 1, struct output);
+ p->name = xstrdup (file_name);
+ p->fp = ck_fopen (p->name, mode, fail);
+ p->missing_newline = false;
+ p->link = *file_ptrs;
+ *file_ptrs = p;
+ }
+ free_buffer (b);
+ return p;
+}
+
+static struct sed_cmd *
+next_cmd_entry (struct vector **vectorp)
+{
+ struct sed_cmd *cmd;
+ struct vector *v;
+
+ v = *vectorp;
+ if (v->v_length == v->v_allocated)
+ {
+ v->v_allocated += VECTOR_ALLOC_INCREMENT;
+ v->v = REALLOC (v->v, v->v_allocated, struct sed_cmd);
+ }
+
+ cmd = v->v + v->v_length;
+ cmd->a1 = NULL;
+ cmd->a2 = NULL;
+ cmd->range_state = RANGE_INACTIVE;
+ cmd->addr_bang = false;
+ cmd->cmd = '\0'; /* something invalid, to catch bugs early */
+
+ *vectorp = v;
+ return cmd;
+}
+
+static int
+snarf_char_class (struct buffer *b, mbstate_t *cur_stat)
+{
+ int ch;
+ int state = 0;
+ int delim IF_LINT ( = 0) ;
+
+ ch = inchar ();
+ if (ch == '^')
+ ch = add_then_next (b, ch);
+ if (ch == CLOSE_BRACKET)
+ ch = add_then_next (b, ch);
+
+ /* States are:
+ 0 outside a collation element, character class or collation class
+ 1 after the bracket
+ 2 after the opening ./:/=
+ 3 after the closing ./:/= */
+
+ for (;; ch = add_then_next (b, ch))
+ {
+ const int mb_char = IS_MB_CHAR (ch, cur_stat);
+
+ switch (ch)
+ {
+ case EOF:
+ case '\n':
+ return ch;
+
+ case '.':
+ case ':':
+ case '=':
+ if (mb_char)
+ continue;
+
+ if (state == 1)
+ {
+ delim = ch;
+ state = 2;
+ }
+ else if (state == 2 && ch == delim)
+ state = 3;
+ else
+ break;
+
+ continue;
+
+ case OPEN_BRACKET:
+ if (mb_char)
+ continue;
+
+ if (state == 0)
+ state = 1;
+ continue;
+
+ case CLOSE_BRACKET:
+ if (mb_char)
+ continue;
+
+ if (state == 0 || state == 1)
+ return ch;
+ else if (state == 3)
+ state = 0;
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* Getting a character different from .=: whilst in state 1
+ goes back to state 0, getting a character different from ]
+ whilst in state 3 goes back to state 2. */
+ state &= ~1;
+ }
+}
+
+static struct buffer *
+match_slash (int slash, int regex)
+{
+ struct buffer *b;
+ int ch;
+ mbstate_t cur_stat = { 0, };
+
+ /* We allow only 1 byte characters for a slash. */
+ if (IS_MB_CHAR (slash, &cur_stat))
+ bad_prog (BAD_DELIM);
+
+ memset (&cur_stat, 0, sizeof cur_stat);
+
+ b = init_buffer ();
+ while ((ch = inchar ()) != EOF && ch != '\n')
+ {
+ const int mb_char = IS_MB_CHAR (ch, &cur_stat);
+
+ if (!mb_char)
+ {
+ if (ch == slash)
+ return b;
+ else if (ch == '\\')
+ {
+ ch = inchar ();
+ if (ch == EOF)
+ break;
+ else if (ch == 'n' && regex)
+ ch = '\n';
+ else if (ch != '\n' && (ch != slash || (!regex && ch == '&')))
+ add1_buffer (b, '\\');
+ }
+ else if (ch == OPEN_BRACKET && regex)
+ {
+ add1_buffer (b, ch);
+ ch = snarf_char_class (b, &cur_stat);
+ if (ch != CLOSE_BRACKET)
+ break;
+ }
+ }
+
+ add1_buffer (b, ch);
+ }
+
+ if (ch == '\n')
+ savchar (ch); /* for proper line number in error report */
+ free_buffer (b);
+ return NULL;
+}
+
+static int
+mark_subst_opts (struct subst *cmd)
+{
+ int flags = 0;
+ int ch;
+
+ cmd->global = false;
+ cmd->print = false;
+ cmd->eval = false;
+ cmd->numb = 0;
+ cmd->outf = NULL;
+
+ for (;;)
+ switch ( (ch = in_nonblank ()) )
+ {
+ case 'i': /* GNU extension */
+ case 'I': /* GNU extension */
+ if (posixicity == POSIXLY_BASIC)
+ bad_prog (_(UNKNOWN_S_OPT));
+ flags |= REG_ICASE;
+ break;
+
+ case 'm': /* GNU extension */
+ case 'M': /* GNU extension */
+ if (posixicity == POSIXLY_BASIC)
+ bad_prog (_(UNKNOWN_S_OPT));
+ flags |= REG_NEWLINE;
+ break;
+
+ case 'e':
+ if (posixicity == POSIXLY_BASIC)
+ bad_prog (_(UNKNOWN_S_OPT));
+ cmd->eval = true;
+ break;
+
+ case 'p':
+ if (cmd->print)
+ bad_prog (_(EXCESS_P_OPT));
+ cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */
+ break;
+
+ case 'g':
+ if (cmd->global)
+ bad_prog (_(EXCESS_G_OPT));
+ cmd->global = true;
+ break;
+
+ case 'w':
+ cmd->outf = get_openfile (&file_write, write_mode, true);
+ return flags;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ if (cmd->numb)
+ bad_prog (_(EXCESS_N_OPT));
+ cmd->numb = in_integer (ch);
+ if (!cmd->numb)
+ bad_prog (_(ZERO_N_OPT));
+ break;
+
+ case CLOSE_BRACE:
+ case '#':
+ savchar (ch);
+ FALLTHROUGH;
+ case EOF:
+ case '\n':
+ case ';':
+ return flags;
+
+ case '\r':
+ if (inchar () == '\n')
+ return flags;
+ FALLTHROUGH;
+
+ default:
+ bad_prog (_(UNKNOWN_S_OPT));
+ /*NOTREACHED*/
+ }
+}
+
+/* read in a label for a `:', `b', or `t' command */
+static char * _GL_ATTRIBUTE_MALLOC
+read_label (void)
+{
+ struct buffer *b;
+ int ch;
+ char *ret;
+
+ b = init_buffer ();
+ ch = in_nonblank ();
+
+ while (ch != EOF && ch != '\n'
+ && !ISBLANK (ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#')
+ ch = add_then_next (b, ch);
+
+ savchar (ch);
+ add1_buffer (b, '\0');
+ ret = xstrdup (get_buffer (b));
+ free_buffer (b);
+ return ret;
+}
+
+/* Store a label (or label reference) created by a `:', `b', or `t'
+ command so that the jump to/from the label can be backpatched after
+ compilation is complete, or a reference created by a `{' to be
+ backpatched when the corresponding `}' is found. */
+static struct sed_label *
+setup_label (struct sed_label *list, countT idx, char *name,
+ const struct error_info *err_info)
+{
+ struct sed_label *ret = OB_MALLOC (&obs, 1, struct sed_label);
+ ret->v_index = idx;
+ ret->name = name;
+ if (err_info)
+ memcpy (&ret->err_info, err_info, sizeof (ret->err_info));
+ ret->next = list;
+ return ret;
+}
+
+static struct sed_label *
+release_label (struct sed_label *list_head)
+{
+ struct sed_label *ret;
+
+ if (!list_head)
+ return NULL;
+ ret = list_head->next;
+
+ free (list_head->name);
+
+#if 0
+ /* We use obstacks */
+ free (list_head);
+#endif
+ return ret;
+}
+
+static struct replacement *
+new_replacement (char *text, size_t length, enum replacement_types type)
+{
+ struct replacement *r = OB_MALLOC (&obs, 1, struct replacement);
+
+ r->prefix = text;
+ r->prefix_length = length;
+ r->subst_id = -1;
+ r->repl_type = type;
+
+ /* r-> next = NULL; */
+ return r;
+}
+
+static void
+setup_replacement (struct subst *sub, const char *text, size_t length)
+{
+ char *base;
+ char *p;
+ char *text_end;
+ enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS;
+ struct replacement root;
+ struct replacement *tail;
+
+ sub->max_id = 0;
+ base = MEMDUP (text, length, char);
+ length = normalize_text (base, length, TEXT_REPLACEMENT);
+
+ IF_LINT (sub->replacement_buffer = base);
+
+ text_end = base + length;
+ tail = &root;
+
+ for (p=base; p<text_end; ++p)
+ {
+ if (*p == '\\')
+ {
+ /* Preceding the backslash may be some literal text: */
+ tail = tail->next =
+ new_replacement (base, (size_t)(p - base), repl_type);
+
+ repl_type = save_type;
+
+ /* Skip the backslash and look for a numeric back-reference,
+ or a case-munging escape if not in POSIX mode: */
+ ++p;
+ if (p == text_end)
+ ++tail->prefix_length;
+
+ else if (posixicity == POSIXLY_BASIC && !ISDIGIT (*p))
+ {
+ p[-1] = *p;
+ ++tail->prefix_length;
+ }
+
+ else
+ switch (*p)
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ tail->subst_id = *p - '0';
+ if (sub->max_id < tail->subst_id)
+ sub->max_id = tail->subst_id;
+ break;
+
+ case 'L':
+ repl_type = REPL_LOWERCASE;
+ save_type = REPL_LOWERCASE;
+ break;
+
+ case 'U':
+ repl_type = REPL_UPPERCASE;
+ save_type = REPL_UPPERCASE;
+ break;
+
+ case 'E':
+ repl_type = REPL_ASIS;
+ save_type = REPL_ASIS;
+ break;
+
+ case 'l':
+ save_type = repl_type;
+ repl_type |= REPL_LOWERCASE_FIRST;
+ break;
+
+ case 'u':
+ save_type = repl_type;
+ repl_type |= REPL_UPPERCASE_FIRST;
+ break;
+
+ default:
+ p[-1] = *p;
+ ++tail->prefix_length;
+ }
+
+ base = p + 1;
+ }
+ else if (*p == '&')
+ {
+ /* Preceding the ampersand may be some literal text: */
+ tail = tail->next =
+ new_replacement (base, (size_t)(p - base), repl_type);
+
+ repl_type = save_type;
+ tail->subst_id = 0;
+ base = p + 1;
+ }
+ }
+ /* There may be some trailing literal text: */
+ if (base < text_end)
+ tail = tail->next =
+ new_replacement (base, (size_t)(text_end - base), repl_type);
+
+ tail->next = NULL;
+ sub->replacement = root.next;
+}
+
+static void
+read_text (struct text_buf *buf, int leadin_ch)
+{
+ int ch;
+
+ /* Should we start afresh (as opposed to continue a partial text)? */
+ if (buf)
+ {
+ if (pending_text)
+ free_buffer (pending_text);
+ pending_text = init_buffer ();
+ buf->text = NULL;
+ buf->text_length = 0;
+ old_text_buf = buf;
+ }
+ /* assert(old_text_buf != NULL); */
+
+ if (leadin_ch == EOF)
+ return;
+
+ if (leadin_ch != '\n')
+ add1_buffer (pending_text, leadin_ch);
+
+ ch = inchar ();
+ while (ch != EOF && ch != '\n')
+ {
+ if (ch == '\\')
+ {
+ ch = inchar ();
+ if (ch != EOF)
+ add1_buffer (pending_text, '\\');
+ }
+
+ if (ch == EOF)
+ {
+ add1_buffer (pending_text, '\n');
+ return;
+ }
+
+ ch = add_then_next (pending_text, ch);
+ }
+
+ add1_buffer (pending_text, '\n');
+ if (!buf)
+ buf = old_text_buf;
+ buf->text_length = normalize_text (get_buffer (pending_text),
+ size_buffer (pending_text), TEXT_BUFFER);
+ buf->text = MEMDUP (get_buffer (pending_text), buf->text_length, char);
+ free_buffer (pending_text);
+ pending_text = NULL;
+}
+
+/* Try to read an address for a sed command. If it succeeds,
+ return non-zero and store the resulting address in `*addr'.
+ If the input doesn't look like an address read nothing
+ and return zero. */
+static bool
+compile_address (struct addr *addr, int ch)
+{
+ addr->addr_type = ADDR_IS_NULL;
+ addr->addr_step = 0;
+ addr->addr_number = ~(countT)0; /* extremely unlikely to ever match */
+ addr->addr_regex = NULL;
+
+ if (ch == '/' || ch == '\\')
+ {
+ int flags = 0;
+ struct buffer *b;
+ addr->addr_type = ADDR_IS_REGEX;
+ if (ch == '\\')
+ ch = inchar ();
+ if ( !(b = match_slash (ch, true)) )
+ bad_prog (_(UNTERM_ADDR_RE));
+
+ for (;;)
+ {
+ ch = in_nonblank ();
+ if (posixicity == POSIXLY_BASIC)
+ goto posix_address_modifier;
+ switch (ch)
+ {
+ case 'I': /* GNU extension */
+ flags |= REG_ICASE;
+ break;
+
+ case 'M': /* GNU extension */
+ flags |= REG_NEWLINE;
+ break;
+
+ default:
+ posix_address_modifier:
+ savchar (ch);
+ addr->addr_regex = compile_regex (b, flags, 0);
+ free_buffer (b);
+ return true;
+ }
+ }
+ }
+ else if (ISDIGIT (ch))
+ {
+ addr->addr_number = in_integer (ch);
+ addr->addr_type = ADDR_IS_NUM;
+ ch = in_nonblank ();
+ if (ch != '~' || posixicity == POSIXLY_BASIC)
+ {
+ savchar (ch);
+ }
+ else
+ {
+ countT step = in_integer (in_nonblank ());
+ if (step > 0)
+ {
+ addr->addr_step = step;
+ addr->addr_type = ADDR_IS_NUM_MOD;
+ }
+ }
+ }
+ else if ((ch == '+' || ch == '~') && posixicity != POSIXLY_BASIC)
+ {
+ addr->addr_step = in_integer (in_nonblank ());
+ if (addr->addr_step==0)
+ ; /* default to ADDR_IS_NULL; forces matching to stop on next line */
+ else if (ch == '+')
+ addr->addr_type = ADDR_IS_STEP;
+ else
+ addr->addr_type = ADDR_IS_STEP_MOD;
+ }
+ else if (ch == '$')
+ {
+ addr->addr_type = ADDR_IS_LAST;
+ }
+ else
+ return false;
+
+ return true;
+}
+
+/* Read a program (or a subprogram within `{' `}' pairs) in and store
+ the compiled form in `*vector'. Return a pointer to the new vector. */
+static struct vector *
+compile_program (struct vector *vector)
+{
+ struct sed_cmd *cur_cmd;
+ struct buffer *b;
+ int ch;
+
+ if (!vector)
+ {
+ vector = XCALLOC (1, struct vector);
+ vector->v = NULL;
+ vector->v_allocated = 0;
+ vector->v_length = 0;
+
+ obstack_init (&obs);
+ }
+ if (pending_text)
+ read_text (NULL, '\n');
+
+ for (;;)
+ {
+ struct addr a;
+
+ while ((ch=inchar ()) == ';' || ISSPACE (ch))
+ ;
+ if (ch == EOF)
+ break;
+
+ cur_cmd = next_cmd_entry (&vector);
+ if (compile_address (&a, ch))
+ {
+ if (a.addr_type == ADDR_IS_STEP
+ || a.addr_type == ADDR_IS_STEP_MOD)
+ bad_prog (_(BAD_STEP));
+
+ cur_cmd->a1 = MEMDUP (&a, 1, struct addr);
+ ch = in_nonblank ();
+ if (ch == ',')
+ {
+ if (!compile_address (&a, in_nonblank ()))
+ bad_prog (_(BAD_COMMA));
+
+ cur_cmd->a2 = MEMDUP (&a, 1, struct addr);
+ ch = in_nonblank ();
+ }
+
+ if ((cur_cmd->a1->addr_type == ADDR_IS_NUM
+ && cur_cmd->a1->addr_number == 0)
+ && ((!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX)
+ || posixicity == POSIXLY_BASIC))
+ bad_prog (_(INVALID_LINE_0));
+ }
+ if (ch == '!')
+ {
+ cur_cmd->addr_bang = true;
+ ch = in_nonblank ();
+ if (ch == '!')
+ bad_prog (_(BAD_BANG));
+ }
+
+ /* Do not accept extended commands in --posix mode. Also,
+ a few commands only accept one address in that mode. */
+ if (posixicity == POSIXLY_BASIC)
+ switch (ch)
+ {
+ case 'e': case 'F': case 'v': case 'z': case 'L':
+ case 'Q': case 'T': case 'R': case 'W':
+ bad_command (ch);
+ FALLTHROUGH;
+
+ case 'a': case 'i': case 'l':
+ case '=': case 'r':
+ if (cur_cmd->a2)
+ bad_prog (_(ONE_ADDR));
+ }
+
+ cur_cmd->cmd = ch;
+ switch (ch)
+ {
+ case '#':
+ if (cur_cmd->a1)
+ bad_prog (_(NO_SHARP_ADDR));
+ ch = inchar ();
+ if (ch=='n' && first_script && cur_input.line < 2)
+ if ( (prog.base && prog.cur==2+prog.base)
+ || (prog.file && !prog.base && 2==ftell (prog.file)))
+ no_default_output = true;
+ while (ch != EOF && ch != '\n')
+ ch = inchar ();
+ continue; /* restart the for (;;) loop */
+
+ case 'v':
+ /* This is an extension. Programs needing GNU sed might start
+ * with a `v' command so that other seds will stop.
+ * We compare the version and ignore POSIXLY_CORRECT.
+ */
+ {
+ char *version = read_label ();
+ char const *compared_version;
+ compared_version = (*version == '\0') ? "4.0" : version;
+ if (strverscmp (compared_version, PACKAGE_VERSION) > 0)
+ bad_prog (_(ANCIENT_VERSION));
+
+ free (version);
+ posixicity = POSIXLY_EXTENDED;
+ }
+ continue;
+
+ case '{':
+ blocks = setup_label (blocks, vector->v_length, NULL, &cur_input);
+ cur_cmd->addr_bang = !cur_cmd->addr_bang;
+ break;
+
+ case '}':
+ if (!blocks)
+ bad_prog (_(EXCESS_CLOSE_BRACE));
+ if (cur_cmd->a1)
+ bad_prog (_(NO_CLOSE_BRACE_ADDR));
+
+ read_end_of_cmd ();
+
+ vector->v[blocks->v_index].x.jump_index = vector->v_length;
+ blocks = release_label (blocks); /* done with this entry */
+ break;
+
+ case 'e':
+ if (sandbox)
+ bad_prog (_(DISALLOWED_CMD));
+
+ ch = in_nonblank ();
+ if (ch == EOF || ch == '\n')
+ {
+ cur_cmd->x.cmd_txt.text_length = 0;
+ break;
+ }
+ else
+ goto read_text_to_slash;
+
+ case 'a':
+ case 'i':
+ case 'c':
+ ch = in_nonblank ();
+
+ read_text_to_slash:
+ if (ch == EOF)
+ bad_prog (_(EXPECTED_SLASH));
+
+ if (ch == '\\')
+ ch = inchar ();
+ else
+ {
+ if (posixicity == POSIXLY_BASIC)
+ bad_prog (_(EXPECTED_SLASH));
+ savchar (ch);
+ ch = '\n';
+ }
+
+ read_text (&cur_cmd->x.cmd_txt, ch);
+ break;
+
+ case ':':
+ if (cur_cmd->a1)
+ bad_prog (_(NO_COLON_ADDR));
+ {
+ char *label = read_label ();
+ if (!*label)
+ bad_prog (_(COLON_LACKS_LABEL));
+ labels = setup_label (labels, vector->v_length, label, NULL);
+
+ if (debug)
+ cur_cmd->x.label_name = strdup (label);
+ }
+ break;
+
+ case 'T':
+ case 'b':
+ case 't':
+ jumps = setup_label (jumps, vector->v_length, read_label (), NULL);
+ break;
+
+ case 'Q':
+ case 'q':
+ if (cur_cmd->a2)
+ bad_prog (_(ONE_ADDR));
+ FALLTHROUGH;
+
+ case 'L':
+ case 'l':
+ ch = in_nonblank ();
+ if (ISDIGIT (ch) && posixicity != POSIXLY_BASIC)
+ {
+ cur_cmd->x.int_arg = in_integer (ch);
+ }
+ else
+ {
+ cur_cmd->x.int_arg = -1;
+ savchar (ch);
+ }
+
+ read_end_of_cmd ();
+ break;
+
+ case '=':
+ case 'd':
+ case 'D':
+ case 'F':
+ case 'g':
+ case 'G':
+ case 'h':
+ case 'H':
+ case 'n':
+ case 'N':
+ case 'p':
+ case 'P':
+ case 'z':
+ case 'x':
+ read_end_of_cmd ();
+ break;
+
+ case 'r':
+ b = read_filename ();
+ if (strlen (get_buffer (b)) == 0)
+ bad_prog (_(MISSING_FILENAME));
+ cur_cmd->x.fname = xstrdup (get_buffer (b));
+ free_buffer (b);
+ break;
+
+ case 'R':
+ cur_cmd->x.inf = get_openfile (&file_read, read_mode, false);
+ break;
+
+ case 'W':
+ case 'w':
+ cur_cmd->x.outf = get_openfile (&file_write, write_mode, true);
+ break;
+
+ case 's':
+ {
+ struct buffer *b2;
+ int flags;
+ int slash;
+
+ slash = inchar ();
+ if ( !(b = match_slash (slash, true)) )
+ bad_prog (_(UNTERM_S_CMD));
+ if ( !(b2 = match_slash (slash, false)) )
+ bad_prog (_(UNTERM_S_CMD));
+
+ cur_cmd->x.cmd_subst = OB_MALLOC (&obs, 1, struct subst);
+ setup_replacement (cur_cmd->x.cmd_subst,
+ get_buffer (b2), size_buffer (b2));
+ free_buffer (b2);
+
+ flags = mark_subst_opts (cur_cmd->x.cmd_subst);
+ cur_cmd->x.cmd_subst->regx =
+ compile_regex (b, flags, cur_cmd->x.cmd_subst->max_id + 1);
+ free_buffer (b);
+
+ if (cur_cmd->x.cmd_subst->eval && sandbox)
+ bad_prog (_(DISALLOWED_CMD));
+ }
+ break;
+
+ case 'y':
+ {
+ size_t len, dest_len;
+ int slash;
+ struct buffer *b2;
+ char *src_buf, *dest_buf;
+
+ slash = inchar ();
+ if ( !(b = match_slash (slash, false)) )
+ bad_prog (_(UNTERM_Y_CMD));
+ src_buf = get_buffer (b);
+ len = normalize_text (src_buf, size_buffer (b), TEXT_BUFFER);
+
+ if ( !(b2 = match_slash (slash, false)) )
+ bad_prog (_(UNTERM_Y_CMD));
+ dest_buf = get_buffer (b2);
+ dest_len = normalize_text (dest_buf, size_buffer (b2), TEXT_BUFFER);
+
+ if (mb_cur_max > 1)
+ {
+ size_t i, j, idx, src_char_num;
+ size_t *src_lens = XCALLOC (len, size_t);
+ char **trans_pairs;
+ size_t mbclen;
+ mbstate_t cur_stat = { 0, };
+
+ /* Enumerate how many character the source buffer has. */
+ for (i = 0, j = 0; i < len;)
+ {
+ mbclen = MBRLEN (src_buf + i, len - i, &cur_stat);
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a single-byte character. */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2
+ || mbclen == 0)
+ mbclen = 1;
+ src_lens[j++] = mbclen;
+ i += mbclen;
+ }
+ src_char_num = j;
+
+ memset (&cur_stat, 0, sizeof cur_stat);
+ idx = 0;
+
+ /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL}
+ src(i) : pointer to i-th source character.
+ dest(i) : pointer to i-th destination character.
+ NULL : terminator */
+ trans_pairs = XCALLOC (2 * src_char_num + 1, char*);
+ cur_cmd->x.translatemb = trans_pairs;
+ for (i = 0; i < src_char_num; i++)
+ {
+ if (idx >= dest_len)
+ bad_prog (_(Y_CMD_LEN));
+
+ /* Set the i-th source character. */
+ trans_pairs[2 * i] = XCALLOC (src_lens[i] + 1, char);
+ memcpy (trans_pairs[2 * i], src_buf, src_lens[i]);
+ trans_pairs[2 * i][src_lens[i]] = '\0';
+ src_buf += src_lens[i]; /* Forward to next character. */
+
+ /* Fetch the i-th destination character. */
+ mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat);
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a single-byte character. */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2
+ || mbclen == 0)
+ mbclen = 1;
+
+ /* Set the i-th destination character. */
+ trans_pairs[2 * i + 1] = XCALLOC (mbclen + 1, char);
+ memcpy (trans_pairs[2 * i + 1], dest_buf + idx, mbclen);
+ trans_pairs[2 * i + 1][mbclen] = '\0';
+ idx += mbclen; /* Forward to next character. */
+ }
+ trans_pairs[2 * i] = NULL;
+ if (idx != dest_len)
+ bad_prog (_(Y_CMD_LEN));
+
+ IF_LINT (free (src_lens));
+ }
+ else
+ {
+ unsigned char *translate =
+ OB_MALLOC (&obs, YMAP_LENGTH, unsigned char);
+ unsigned char *ustring = (unsigned char *)src_buf;
+
+ if (len != dest_len)
+ bad_prog (_(Y_CMD_LEN));
+
+ for (len = 0; len < YMAP_LENGTH; len++)
+ translate[len] = len;
+
+ while (dest_len--)
+ translate[*ustring++] = (unsigned char)*dest_buf++;
+
+ cur_cmd->x.translate = translate;
+ }
+
+ read_end_of_cmd ();
+
+ free_buffer (b);
+ free_buffer (b2);
+ }
+ break;
+
+ case EOF:
+ bad_prog (_(NO_COMMAND));
+ /*NOTREACHED*/
+
+ default:
+ bad_command (ch);
+ /*NOTREACHED*/
+ }
+
+ /* this is buried down here so that "continue" statements will miss it */
+ ++vector->v_length;
+ }
+ if (posixicity == POSIXLY_BASIC && pending_text)
+ bad_prog (_(INCOMPLETE_CMD));
+ return vector;
+}
+
+/* deal with \X escapes */
+size_t
+normalize_text (char *buf, size_t len, enum text_types buftype)
+{
+ const char *bufend = buf + len;
+ char *p = buf;
+ char *q = buf;
+ char ch;
+ int base;
+
+ /* This variable prevents normalizing text within bracket
+ subexpressions when conforming to POSIX. If 0, we
+ are not within a bracket expression. If -1, we are within a
+ bracket expression but are not within [.FOO.], [=FOO=],
+ or [:FOO:]. Otherwise, this is the '.', '=', or ':'
+ respectively within these three types of subexpressions. */
+ int bracket_state = 0;
+
+ int mbclen;
+ mbstate_t cur_stat = { 0, };
+
+ while (p < bufend)
+ {
+ mbclen = MBRLEN (p, bufend - p, &cur_stat);
+ if (mbclen != 1)
+ {
+ /* An invalid sequence, or a truncated multibyte character.
+ We treat it as a single-byte character. */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+ mbclen = 1;
+
+ memmove (q, p, mbclen);
+ q += mbclen;
+ p += mbclen;
+ continue;
+ }
+
+ if (*p == '\\' && p+1 < bufend && bracket_state == 0)
+ switch (*++p)
+ {
+#if defined __STDC__ && __STDC__-0
+ case 'a': *q++ = '\a'; p++; continue;
+#else /* Not STDC; we'll just assume ASCII */
+ case 'a': *q++ = '\007'; p++; continue;
+#endif
+ /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */
+ case 'f': *q++ = '\f'; p++; continue;
+ case '\n': /*fall through */
+ case 'n': *q++ = '\n'; p++; continue;
+ case 'r': *q++ = '\r'; p++; continue;
+ case 't': *q++ = '\t'; p++; continue;
+ case 'v': *q++ = '\v'; p++; continue;
+
+ case 'd': /* decimal byte */
+ base = 10;
+ goto convert;
+
+ case 'x': /* hexadecimal byte */
+ base = 16;
+ goto convert;
+
+ case 'o': /* octal byte */
+ base = 8;
+convert:
+ p = convert_number (&ch, p, bufend, base);
+
+ /* for an ampersand in a replacement, pass the \ up one level */
+ if (buftype == TEXT_REPLACEMENT && (ch == '&' || ch == '\\'))
+ *q++ = '\\';
+ *q++ = ch;
+ continue;
+
+ case 'c':
+ if (++p < bufend)
+ {
+ *q++ = toupper ((unsigned char) *p) ^ 0x40;
+ if (*p == '\\')
+ {
+ p++;
+ if (*p != '\\')
+ bad_prog (RECURSIVE_ESCAPE_C);
+ }
+ p++;
+ continue;
+ }
+ else
+ {
+ /* we just pass the \ up one level for interpretation */
+ if (buftype != TEXT_BUFFER)
+ *q++ = '\\';
+ continue;
+ }
+
+ default:
+ /* we just pass the \ up one level for interpretation */
+ if (buftype != TEXT_BUFFER)
+ *q++ = '\\';
+ break;
+ }
+ else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED)
+ switch (*p)
+ {
+ case '[':
+ if (!bracket_state)
+ bracket_state = -1;
+ break;
+
+ case ':':
+ case '.':
+ case '=':
+ if (bracket_state == -1 && p[-1] == '[')
+ bracket_state = *p;
+ break;
+
+ case ']':
+ if (bracket_state == 0)
+ ;
+ else if (bracket_state == -1)
+ bracket_state = 0;
+ else if (p[-2] != bracket_state && p[-1] == bracket_state)
+ bracket_state = -1;
+ break;
+ }
+
+ *q++ = *p++;
+ }
+ return (size_t)(q - buf);
+}
+
+
+/* `str' is a string (from the command line) that contains a sed command.
+ Compile the command, and add it to the end of `cur_program'. */
+struct vector *
+compile_string (struct vector *cur_program, char *str, size_t len)
+{
+ static countT string_expr_count = 0;
+ struct vector *ret;
+
+ prog.file = NULL;
+ prog.base = (unsigned char *)str;
+ prog.cur = prog.base;
+ prog.end = prog.cur + len;
+
+ cur_input.line = 0;
+ cur_input.name = NULL;
+ cur_input.string_expr_count = ++string_expr_count;
+
+ ret = compile_program (cur_program);
+ prog.base = NULL;
+ prog.cur = NULL;
+ prog.end = NULL;
+
+ first_script = false;
+ return ret;
+}
+
+/* `cmdfile' is the name of a file containing sed commands.
+ Read them in and add them to the end of `cur_program'.
+ */
+struct vector *
+compile_file (struct vector *cur_program, const char *cmdfile)
+{
+ struct vector *ret;
+
+ prog.file = stdin;
+ if (cmdfile[0] != '-' || cmdfile[1] != '\0')
+ {
+#ifdef HAVE_FOPEN_RT
+ prog.file = ck_fopen (cmdfile, "rt", true);
+#else
+ prog.file = ck_fopen (cmdfile, "r", true);
+#endif
+ }
+
+ cur_input.line = 1;
+ cur_input.name = cmdfile;
+ cur_input.string_expr_count = 0;
+
+ ret = compile_program (cur_program);
+ if (prog.file != stdin)
+ ck_fclose (prog.file);
+ prog.file = NULL;
+
+ first_script = false;
+ return ret;
+}
+
+static void
+cleanup_program_filenames (void)
+{
+ {
+ struct output *p;
+
+ for (p = file_read; p; p = p->link)
+ if (p->name)
+ {
+ free (p->name);
+ p->name = NULL;
+ }
+
+ for (p = file_write; p; p = p->link)
+ if (p->name)
+ {
+ free (p->name);
+ p->name = NULL;
+ }
+ }
+}
+
+/* Make any checks which require the whole program to have been read.
+ In particular: this backpatches the jump targets.
+ Any cleanup which can be done after these checks is done here also. */
+void
+check_final_program (struct vector *program)
+{
+ struct sed_label *go;
+ struct sed_label *lbl;
+
+ /* do all "{"s have a corresponding "}"? */
+ if (blocks)
+ {
+ /* update info for error reporting: */
+ memcpy (&cur_input, &blocks->err_info, sizeof (cur_input));
+ bad_prog (_(EXCESS_OPEN_BRACE));
+ }
+
+ /* was the final command an unterminated a/c/i command? */
+ if (pending_text)
+ {
+ old_text_buf->text_length = size_buffer (pending_text);
+ if (old_text_buf->text_length)
+ old_text_buf->text = MEMDUP (get_buffer (pending_text),
+ old_text_buf->text_length, char);
+ free_buffer (pending_text);
+ pending_text = NULL;
+ }
+
+ for (go = jumps; go; go = release_label (go))
+ {
+ for (lbl = labels; lbl; lbl = lbl->next)
+ if (strcmp (lbl->name, go->name) == 0)
+ break;
+ if (lbl)
+ {
+ program->v[go->v_index].x.jump_index = lbl->v_index;
+ }
+ else
+ {
+ if (*go->name)
+ panic (_("can't find label for jump to `%s'"), go->name);
+ program->v[go->v_index].x.jump_index = program->v_length;
+ }
+ }
+ jumps = NULL;
+
+ for (lbl = labels; lbl; lbl = release_label (lbl))
+ ;
+ labels = NULL;
+}
+
+
+/* Rewind all resources which were allocated in this module. */
+void
+rewind_read_files (void)
+{
+ struct output *p;
+
+ for (p=file_read; p; p=p->link)
+ if (p->fp)
+ rewind (p->fp);
+}
+
+/* Release all resources which were allocated in this module. */
+void
+finish_program (struct vector *program)
+{
+ cleanup_program_filenames ();
+
+ /* close all files... */
+ {
+ struct output *p, *q;
+
+ for (p=file_read; p; p=q)
+ {
+ if (p->fp)
+ ck_fclose (p->fp);
+ q = p->link;
+#if 0
+ /* We use obstacks. */
+ free (p);
+#endif
+ }
+
+ for (p=file_write; p; p=q)
+ {
+ if (p->fp)
+ ck_fclose (p->fp);
+ q = p->link;
+#if 0
+ /* We use obstacks. */
+ free (p);
+#endif
+ }
+ file_read = file_write = NULL;
+ }
+
+#ifdef lint
+ for (int i = 0; i < program->v_length; ++i)
+ {
+ const struct sed_cmd *sc = &program->v[i];
+
+ if (sc->a1 && sc->a1->addr_regex)
+ release_regex (sc->a1->addr_regex);
+ if (sc->a2 && sc->a2->addr_regex)
+ release_regex (sc->a2->addr_regex);
+
+ switch (sc->cmd)
+ {
+ case 's':
+ free (sc->x.cmd_subst->replacement_buffer);
+ if (sc->x.cmd_subst->regx)
+ release_regex (sc->x.cmd_subst->regx);
+ break;
+ }
+ }
+
+ obstack_free (&obs, NULL);
+#else
+ (void)program;
+#endif /* lint */
+
+}
diff --git a/sed/debug.c b/sed/debug.c
new file mode 100644
index 0000000..9ec37b6
--- /dev/null
+++ b/sed/debug.c
@@ -0,0 +1,456 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Assaf Gordon. */
+
+/* debug.c: debugging functions */
+
+#include "sed.h"
+#include "basicdefs.h"
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <assert.h>
+
+/* indentation level when printing the program */
+static int block_level = 0;
+
+
+void
+debug_print_char (char c)
+{
+ if (ISPRINT (c) && c != '\\')
+ {
+ putchar (c);
+ return;
+ }
+
+ putchar ('\\');
+ switch (c)
+ {
+ case '\a':
+ putchar ('a');
+ break;
+ case '\f':
+ putchar ('f');
+ break;
+ case '\r':
+ putchar ('r');
+ break;
+ case '\t':
+ putchar ('t');
+ break;
+ case '\v':
+ putchar ('v');
+ break;
+ case '\n':
+ putchar ('n');
+ break;
+ case '\\':
+ putchar ('\\');
+ break;
+
+ default:
+ printf ("o%03o", (unsigned int) c);
+ }
+}
+
+static void
+debug_print_regex_pattern (const char *pat, size_t len)
+{
+ const char *p = pat;
+ while (len--)
+ {
+ if (*p == '/')
+ fputs ("\\/", stdout);
+ else
+ debug_print_char (*p);
+ ++p;
+ }
+}
+
+static void
+debug_print_regex_flags (const struct regex *r, bool addr)
+{
+ if (!r)
+ return;
+
+#ifdef REG_PERL
+ if (r->flags & REG_DOTALL) /* REG_PERL */
+ putchar ('s');
+ if (r->flags & REG_EXTENDED) /* REG_PERL */
+ putchar ('x');
+#endif
+
+ if (r->flags & REG_ICASE)
+ putchar (addr ? 'I' : 'i');
+ if (r->flags & REG_NEWLINE)
+ putchar (addr ? 'M' : 'm');
+}
+
+static void
+debug_print_regex (const struct regex *r)
+{
+ if (!r)
+ {
+ /* Previous Regex */
+ fputs ("//", stdout);
+ return;
+ }
+
+ putchar ('/');
+ debug_print_regex_pattern (r->re, r->sz);
+ putchar ('/');
+}
+
+static void
+debug_print_addr (const struct addr *a)
+{
+ if (!a)
+ return;
+ switch (a->addr_type)
+ {
+ case ADDR_IS_NULL:
+ fputs ("[ADDR-NULL]", stdout);
+ break;
+ case ADDR_IS_REGEX:
+ debug_print_regex (a->addr_regex);
+ debug_print_regex_flags (a->addr_regex, true);
+ break;
+ case ADDR_IS_NUM:
+ printf ("%lu", a->addr_number);
+ break;
+ case ADDR_IS_NUM_MOD:
+ printf ("%lu~%lu", a->addr_number, a->addr_step);
+ break;
+ case ADDR_IS_STEP:
+ printf ("+%lu", a->addr_step);
+ break;
+ case ADDR_IS_STEP_MOD:
+ printf ("~%lu", a->addr_step);
+ break;
+ case ADDR_IS_LAST:
+ putchar ('$');
+ break;
+ }
+}
+
+static void
+debug_print_subst_replacement (const struct replacement *r)
+{
+ enum replacement_types last_repl_type = REPL_ASIS;
+
+ if (!r)
+ return;
+
+ const struct replacement *p = r;
+ while (p)
+ {
+ if (p->repl_type != last_repl_type)
+ {
+ /* Special GNU replacements \E\U\u\L\l should be printed
+ BEFORE the 'prefix' .... the 'prefix' refers to being
+ before the backreference. */
+ putchar ('\\');
+ if (p->repl_type == 0)
+ putchar ('E');
+ else if (p->repl_type == REPL_UPPERCASE)
+ putchar ('U');
+ else if (p->repl_type == REPL_LOWERCASE)
+ putchar ('L');
+ else if ((p->repl_type & REPL_MODIFIERS) == REPL_UPPERCASE_FIRST)
+ putchar ('u');
+ else if ((p->repl_type & REPL_MODIFIERS) == REPL_LOWERCASE_FIRST)
+ putchar ('l');
+
+ last_repl_type = p->repl_type;
+ }
+
+ if (p->prefix_length)
+ fwrite (p->prefix, 1, p->prefix_length, stdout);
+
+ if (p->subst_id != -1)
+ {
+ if (p->subst_id == 0)
+ putchar ('&');
+ else
+ printf ("\\%d", p->subst_id);
+ }
+
+ p = p->next;
+ }
+}
+
+static void
+debug_print_output_file (const struct output *o)
+{
+ if (!o)
+ return;
+
+ fputs (o->name, stdout);
+}
+
+static void
+debug_print_subst (const struct subst *s)
+{
+ if (!s)
+ return;
+
+ debug_print_regex (s->regx);
+ debug_print_subst_replacement (s->replacement);
+ putchar ('/');
+
+ debug_print_regex_flags (s->regx, false);
+
+ if (s->global)
+ putchar ('g');
+ if (s->eval)
+ putchar ('e');
+ if (s->print)
+ putchar ('p');
+ if (s->numb)
+ printf ("%lu", s->numb);
+ if (s->outf)
+ {
+ putchar ('w');
+ debug_print_output_file (s->outf);
+ }
+}
+
+static void
+debug_print_translation (const struct sed_cmd *sc)
+{
+ unsigned int i;
+
+ if (mb_cur_max > 1)
+ {
+ /* multibyte translation */
+ putchar ('/');
+ for (i = 0; sc->x.translatemb[2 * i] != NULL; i++)
+ fputs (sc->x.translatemb[2 * i], stdout);
+ putchar ('/');
+ for (i = 0; sc->x.translatemb[2 * i] != NULL; i++)
+ fputs (sc->x.translatemb[2 * i + 1], stdout);
+ putchar ('/');
+ }
+ else
+ {
+ /* unibyte translation */
+ putchar ('/');
+ for (i = 0; i < 256; ++i)
+ if (sc->x.translate[i] != (unsigned char) i)
+ putchar ((unsigned char) i);
+ putchar ('/');
+ for (i = 0; i < 256; ++i)
+ if (sc->x.translate[i] != (unsigned char) i)
+ putchar (sc->x.translate[i]);
+ putchar ('/');
+ }
+}
+
+static void
+debug_print_function (const struct vector *program, const struct sed_cmd *sc)
+{
+ if (!sc)
+ return;
+
+ putchar (sc->cmd);
+
+ switch (sc->cmd) /* LCOV_EXCL_BR */
+ {
+ case '=':
+ break;
+
+ case ':':
+ printf ("%s", sc->x.label_name);
+ break;
+
+ case '{':
+ break;
+
+ case '}':
+ break;
+
+ case '#': /* LCOV_EXCL_LINE */
+ /* should not happen - discarded during compilation. */
+ assert (0); /* LCOV_EXCL_LINE */
+
+ case 'a':
+ case 'c':
+ case 'i':
+ fputs ("\\", stdout);
+ if (sc->x.cmd_txt.text_length)
+ fwrite (sc->x.cmd_txt.text, 1, sc->x.cmd_txt.text_length, stdout);
+ break;
+
+ case 'b':
+ case 't':
+ case 'T':
+ {
+ if (sc->x.jump_index < program->v_length)
+ {
+ const char *label_name = program->v[sc->x.jump_index].x.label_name;
+ if (label_name)
+ printf (" %s", label_name);
+ }
+ }
+ break;
+
+ case 'D':
+ break;
+
+ case 'd':
+ break;
+
+ case 'e':
+ putchar (' ');
+ fwrite (sc->x.cmd_txt.text, 1, sc->x.cmd_txt.text_length, stdout);
+ break;
+
+ case 'F':
+ break;
+
+ case 'g':
+ break;
+
+ case 'G':
+ break;
+
+ case 'h':
+ break;
+
+ case 'H':
+ break;
+
+ /* 'i' is lumped above with 'a' and 'c' */
+
+ case 'L':
+ case 'l':
+ case 'q':
+ case 'Q':
+ if (sc->x.int_arg != -1)
+ printf (" %d", sc->x.int_arg);
+ break;
+
+ case 'n':
+ break;
+
+ case 'N':
+ break;
+
+ case 'P':
+ break;
+
+ case 'p':
+ break;
+
+ /* 'q','Q' are lumped above with 'L' and 'l' */
+
+ case 'r':
+ putchar (' ');
+ fputs (sc->x.fname, stdout);
+ break;
+
+ case 'R':
+ putchar (' ');
+ fputs (sc->x.inf->name, stdout);
+ break;
+
+ case 's':
+ debug_print_subst (sc->x.cmd_subst);
+ break;
+
+ /* 't','T' are lumped above with 'b' */
+
+ case 'v': /* LCOV_EXCL_LINE */
+ /* should not happen - handled during compilation then discarded. */
+ assert (0); /* LCOV_EXCL_LINE */
+
+ case 'W':
+ debug_print_output_file (sc->x.outf);
+ break;
+
+ case 'w':
+ debug_print_output_file (sc->x.outf);
+ break;
+
+ case 'x':
+ break;
+
+ case 'y':
+ debug_print_translation (sc);
+ break;
+
+ case 'z':
+ break;
+
+ default: /* LCOV_EXCL_LINE */
+ /* should not happen - unless missed a sed command. */
+ assert (0); /* LCOV_EXCL_LINE */
+ }
+}
+
+void
+debug_print_command (const struct vector *program, const struct sed_cmd *sc)
+{
+ bool addr_bang;
+ if (!program)
+ return;
+
+ if (sc->cmd == '}')
+ --block_level;
+
+ for (int j = 0; j < block_level; ++j)
+ fputs (" ", stdout);
+
+ debug_print_addr (sc->a1);
+ if (sc->a2)
+ putchar (',');
+ debug_print_addr (sc->a2);
+
+ addr_bang = sc->addr_bang;
+ /* Implmentation detail: GNU Sed implements beginning of block
+ by negating the matched address and jumping if there's no match. */
+ if (sc->cmd == '{')
+ addr_bang = !addr_bang;
+ if (addr_bang)
+ putchar ('!');
+
+ if (sc->a1 || sc->a2)
+ putchar (' ');
+
+ debug_print_function (program, sc);
+
+ putchar ('\n');
+
+ if (sc->cmd == '{')
+ ++block_level;
+}
+
+void
+debug_print_program (const struct vector *program)
+{
+ if (!program)
+ return;
+
+ block_level = 1;
+ puts ("SED PROGRAM:");
+ for (size_t i = 0; i < program->v_length; ++i)
+ debug_print_command (program, &program->v[i]);
+ block_level = 0;
+}
diff --git a/sed/execute.c b/sed/execute.c
new file mode 100644
index 0000000..b39bab4
--- /dev/null
+++ b/sed/execute.c
@@ -0,0 +1,1705 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1989-2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+#define INITIAL_BUFFER_SIZE 50
+#define FREAD_BUFFER_SIZE 8192
+
+#include "sed.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "stat-macros.h"
+
+#include <selinux/selinux.h>
+#include <selinux/context.h>
+#include "acl.h"
+#include "ignore-value.h"
+#include "progname.h"
+#include "xalloc.h"
+
+/* The number of extra bytes that must be allocated/usable, beyond
+ the declared "end" of each line buffer that may be passed to
+ match_regex. This is imposed by its use of dfaexec. */
+#define DFA_SLOP 1
+
+/* Sed operates a line at a time. */
+struct line {
+ char *text; /* Pointer to line allocated by malloc. */
+ char *active; /* Pointer to non-consumed part of text. */
+ size_t length; /* Length of text (or active, if used). */
+ size_t alloc; /* Allocated space for active. */
+ bool chomped; /* Was a trailing newline dropped? */
+ mbstate_t mbstate;
+};
+
+#define SIZEOF_LINE offsetof (struct line, mbstate)
+
+/* A queue of text to write out at the end of a cycle
+ (filled by the "a", "r" and "R" commands.) */
+struct append_queue {
+ const char *fname;
+ char *text;
+ size_t textlen;
+ struct append_queue *next;
+ bool free;
+};
+
+/* State information for the input stream. */
+struct input {
+ /* The list of yet-to-be-opened files. It is invalid for file_list
+ to be NULL. When *file_list is NULL we are currently processing
+ the last file. */
+
+ char **file_list;
+
+ /* Count of files we failed to open. */
+ countT bad_count;
+
+ /* Current input line number (over all files). */
+ countT line_number;
+
+ /* True if we'll reset line numbers and addresses before
+ starting to process the next (possibly the first) file. */
+ bool reset_at_next_file;
+
+ /* Function to read one line. If FP is NULL, read_fn better not
+ be one which uses fp; in particular, read_always_fail() is
+ recommended. */
+ bool (*read_fn) (struct input *); /* read one line */
+
+ char *out_file_name;
+
+ const char *in_file_name;
+
+ /* Owner and mode to be set just before closing the file. */
+ struct stat st;
+
+ /* if NULL, none of the following are valid */
+ FILE *fp;
+
+ bool no_buffering;
+};
+
+
+/* Have we done any replacements lately? This is used by the `t' command. */
+static bool replaced = false;
+
+/* The current output file (stdout if -i is not being used. */
+static struct output output_file;
+
+/* The `current' input line. */
+static struct line line;
+
+/* An input line used to accumulate the result of the s and e commands. */
+static struct line s_accum;
+
+/* An input line that's been stored by later use by the program */
+static struct line hold;
+
+/* The buffered input look-ahead. The only field that should be
+ used outside of read_mem_line() or line_init() is buffer.length. */
+static struct line buffer;
+
+static struct append_queue *append_head = NULL;
+static struct append_queue *append_tail = NULL;
+
+/* increase a struct line's length, making some attempt at
+ keeping realloc() calls under control by padding for future growth. */
+static void
+resize_line (struct line *lb, size_t len)
+{
+ int inactive;
+ inactive = lb->active - lb->text;
+
+ /* If the inactive part has got to more than two thirds of the buffer,
+ * remove it. */
+ if (inactive > lb->alloc * 2)
+ {
+ memmove (lb->text, lb->active, lb->length);
+ lb->alloc += lb->active - lb->text;
+ lb->active = lb->text;
+ inactive = 0;
+
+ if (lb->alloc > len)
+ return;
+ }
+
+ lb->alloc *= 2;
+ if (lb->alloc < len)
+ lb->alloc = len;
+ if (lb->alloc < INITIAL_BUFFER_SIZE)
+ lb->alloc = INITIAL_BUFFER_SIZE;
+
+ lb->text = REALLOC (lb->text, inactive + lb->alloc + DFA_SLOP, char);
+ lb->active = lb->text + inactive;
+}
+
+/* Append LENGTH bytes from STRING to the line, TO. */
+static void
+str_append (struct line *to, const char *string, size_t length)
+{
+ size_t new_length = to->length + length;
+
+ if (to->alloc < new_length)
+ resize_line (to, new_length);
+ memcpy (to->active + to->length, string, length);
+ to->length = new_length;
+
+ if (mb_cur_max > 1 && !is_utf8)
+ while (length)
+ {
+ size_t n = MBRLEN (string, length, &to->mbstate);
+
+ /* Treat an invalid or incomplete sequence like a
+ single-byte character. */
+ if (n == (size_t) -1 || n == (size_t) -2)
+ {
+ memset (&to->mbstate, 0, sizeof (to->mbstate));
+ n = 1;
+ }
+
+ if (n == 0)
+ break;
+
+ string += n;
+ length -= n;
+ }
+}
+
+static void
+str_append_modified (struct line *to, const char *string, size_t length,
+ enum replacement_types type)
+{
+ mbstate_t from_stat;
+
+ if (type == REPL_ASIS)
+ {
+ str_append (to, string, length);
+ return;
+ }
+
+ if (to->alloc - to->length < length * mb_cur_max)
+ resize_line (to, to->length + length * mb_cur_max);
+
+ memcpy (&from_stat, &to->mbstate, sizeof (mbstate_t));
+ while (length)
+ {
+ wchar_t wc;
+ size_t n = MBRTOWC (&wc, string, length, &from_stat);
+
+ /* Treat an invalid sequence like a single-byte character. */
+ if (n == (size_t) -1)
+ {
+ type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST);
+ if (type == REPL_ASIS)
+ {
+ str_append (to, string, length);
+ return;
+ }
+
+ str_append (to, string, 1);
+ memset (&to->mbstate, 0, sizeof (from_stat));
+ n = 1;
+ string += n, length -= n;
+ continue;
+ }
+
+ if (n == 0 || n == (size_t) -2)
+ {
+ /* L'\0' or an incomplete sequence: copy it manually. */
+ str_append (to, string, length);
+ return;
+ }
+
+ string += n, length -= n;
+
+ /* Convert the first character specially... */
+ if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST))
+ {
+ if (type & REPL_UPPERCASE_FIRST)
+ wc = towupper (wc);
+ else
+ wc = towlower (wc);
+
+ type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST);
+ if (type == REPL_ASIS)
+ {
+ /* Copy the new wide character to the end of the string. */
+ n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
+ to->length += n;
+ if (n == (size_t) -1 || n == (size_t) -2)
+ {
+ fprintf (stderr,
+ _("case conversion produced an invalid character"));
+ abort ();
+ }
+ str_append (to, string, length);
+ return;
+ }
+ }
+ else if (type & REPL_UPPERCASE)
+ wc = towupper (wc);
+ else
+ wc = towlower (wc);
+
+ /* Copy the new wide character to the end of the string. */
+ n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
+ to->length += n;
+ if (n == -1 || n == -2)
+ {
+ fprintf (stderr, _("case conversion produced an invalid character"));
+ abort ();
+ }
+ }
+}
+
+/* Initialize a "struct line" buffer. Copy multibyte state from `state'
+ if not null. */
+static void
+line_init (struct line *buf, struct line *state, size_t initial_size)
+{
+ buf->text = XCALLOC (initial_size + DFA_SLOP, char);
+ buf->active = buf->text;
+ buf->alloc = initial_size;
+ buf->length = 0;
+ buf->chomped = true;
+
+ if (state)
+ memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate));
+ else
+ memset (&buf->mbstate, 0, sizeof (buf->mbstate));
+}
+
+/* Reset a "struct line" buffer to length zero. Copy multibyte state from
+ `state' if not null. */
+static void
+line_reset (struct line *buf, struct line *state)
+{
+ if (buf->alloc == 0)
+ line_init (buf, state, INITIAL_BUFFER_SIZE);
+ else
+ {
+ buf->length = 0;
+ if (state)
+ memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate));
+ else
+ memset (&buf->mbstate, 0, sizeof (buf->mbstate));
+ }
+}
+
+/* Copy the contents of the line `from' into the line `to'.
+ This destroys the old contents of `to'.
+ Copy the multibyte state if `state' is true. */
+static void
+line_copy (struct line *from, struct line *to, int state)
+{
+ /* Remove the inactive portion in the destination buffer. */
+ to->alloc += to->active - to->text;
+
+ if (to->alloc < from->length)
+ {
+ to->alloc *= 2;
+ if (to->alloc < from->length)
+ to->alloc = from->length;
+ if (to->alloc < INITIAL_BUFFER_SIZE)
+ to->alloc = INITIAL_BUFFER_SIZE;
+ /* Use free()+MALLOC() instead of REALLOC() to
+ avoid unnecessary copying of old text. */
+ free (to->text);
+ to->text = XCALLOC (to->alloc + DFA_SLOP, char);
+ }
+
+ to->active = to->text;
+ to->length = from->length;
+ to->chomped = from->chomped;
+ memcpy (to->active, from->active, from->length);
+
+ if (state)
+ memcpy (&to->mbstate, &from->mbstate, sizeof (from->mbstate));
+}
+
+/* Append the contents of the line `from' to the line `to'.
+ Copy the multibyte state if `state' is true. */
+static void
+line_append (struct line *from, struct line *to, int state)
+{
+ str_append (to, &buffer_delimiter, 1);
+ str_append (to, from->active, from->length);
+ to->chomped = from->chomped;
+
+ if (state)
+ memcpy (&to->mbstate, &from->mbstate, sizeof (from->mbstate));
+}
+
+/* Exchange two "struct line" buffers.
+ Copy the multibyte state if `state' is true. */
+static void
+line_exchange (struct line *a, struct line *b, int state)
+{
+ struct line t;
+
+ if (state)
+ {
+ memcpy (&t, a, sizeof (struct line));
+ memcpy ( a, b, sizeof (struct line));
+ memcpy ( b, &t, sizeof (struct line));
+ }
+ else
+ {
+ memcpy (&t, a, SIZEOF_LINE);
+ memcpy ( a, b, SIZEOF_LINE);
+ memcpy ( b, &t, SIZEOF_LINE);
+ }
+}
+
+/* dummy function to simplify read_pattern_space() */
+static bool
+read_always_fail (struct input *input _GL_UNUSED)
+{
+ return false;
+}
+
+static bool
+read_file_line (struct input *input)
+{
+ static char *b;
+ static size_t blen;
+
+ long result = ck_getdelim (&b, &blen, buffer_delimiter, input->fp);
+ if (result <= 0)
+ return false;
+
+ /* Remove the trailing new-line that is left by getline. */
+ if (b[result - 1] == buffer_delimiter)
+ --result;
+ else
+ line.chomped = false;
+
+ str_append (&line, b, result);
+ return true;
+}
+
+static inline void
+output_missing_newline (struct output *outf)
+{
+ if (outf->missing_newline)
+ {
+ ck_fwrite (&buffer_delimiter, 1, 1, outf->fp);
+ outf->missing_newline = false;
+ }
+}
+
+static inline void
+flush_output (FILE *fp)
+{
+ if (unbuffered)
+ ck_fflush (fp);
+}
+
+static void
+output_line (const char *text, size_t length, int nl, struct output *outf)
+{
+ if (!text)
+ return;
+
+ output_missing_newline (outf);
+ if (length)
+ ck_fwrite (text, 1, length, outf->fp);
+ if (nl)
+ ck_fwrite (&buffer_delimiter, 1, 1, outf->fp);
+ else
+ outf->missing_newline = true;
+
+ flush_output (outf->fp);
+}
+
+static struct append_queue *
+next_append_slot (void)
+{
+ struct append_queue *n = XCALLOC (1, struct append_queue);
+
+ n->fname = NULL;
+ n->text = NULL;
+ n->textlen = 0;
+ n->next = NULL;
+ n->free = false;
+
+ if (append_tail)
+ append_tail->next = n;
+ else
+ append_head = n;
+ return append_tail = n;
+}
+
+static void
+release_append_queue (void)
+{
+ struct append_queue *p, *q;
+
+ for (p=append_head; p; p=q)
+ {
+ if (p->free)
+ free (p->text);
+
+ q = p->next;
+ free (p);
+ }
+ append_head = append_tail = NULL;
+}
+
+static void
+dump_append_queue (void)
+{
+ struct append_queue *p;
+
+ output_missing_newline (&output_file);
+ for (p=append_head; p; p=p->next)
+ {
+ if (p->text)
+ ck_fwrite (p->text, 1, p->textlen, output_file.fp);
+
+ if (p->fname)
+ {
+ char buf[FREAD_BUFFER_SIZE];
+ size_t cnt;
+ FILE *fp;
+
+ /* "If _fname_ does not exist or cannot be read, it shall
+ be treated as if it were an empty file, causing no error
+ condition." IEEE Std 1003.2-1992
+ So, don't fail. */
+ fp = ck_fopen (p->fname, read_mode, false);
+ if (fp)
+ {
+ while ((cnt = ck_fread (buf, 1, sizeof buf, fp)) > 0)
+ ck_fwrite (buf, 1, cnt, output_file.fp);
+ ck_fclose (fp);
+ }
+ }
+ }
+
+ flush_output (output_file.fp);
+ release_append_queue ();
+}
+
+/* Compute the name of the backup file for in-place editing */
+static char *
+get_backup_file_name (const char *name)
+{
+ char *old_asterisk, *asterisk, *backup, *p;
+ int name_length = strlen (name), backup_length = strlen (in_place_extension);
+
+ /* Compute the length of the backup file */
+ for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
+ (asterisk = strchr (old_asterisk, '*'));
+ old_asterisk = asterisk + 1)
+ backup_length += name_length - 1;
+
+ p = backup = xmalloc (backup_length + 1);
+
+ /* Each iteration gobbles up to an asterisk */
+ for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
+ (asterisk = strchr (old_asterisk, '*'));
+ old_asterisk = asterisk + 1)
+ {
+ memcpy (p, old_asterisk, asterisk - old_asterisk);
+ p += asterisk - old_asterisk;
+ strcpy (p, name);
+ p += name_length;
+ }
+
+ /* Tack on what's after the last asterisk */
+ strcpy (p, old_asterisk);
+ return backup;
+}
+
+/* Initialize a struct input for the named file. */
+static void
+open_next_file (const char *name, struct input *input)
+{
+ buffer.length = 0;
+
+ input->in_file_name = name;
+ if (name[0] == '-' && name[1] == '\0' && !in_place_extension)
+ {
+ clearerr (stdin); /* clear any stale EOF indication */
+#if defined WIN32 || defined _WIN32 || defined __CYGWIN__ \
+ || defined MSDOS || defined __EMX__
+ input->fp = ck_fdopen (fileno (stdin), "stdin", read_mode, false);
+#else
+ input->fp = stdin;
+#endif
+ }
+ else
+ {
+ if (follow_symlinks)
+ input->in_file_name = follow_symlink (name);
+
+ if ( ! (input->fp = ck_fopen (name, read_mode, false)) )
+ {
+ const char *ptr = strerror (errno);
+ fprintf (stderr, _("%s: can't read %s: %s\n"), program_name,
+ name, ptr);
+ input->read_fn = read_always_fail; /* a redundancy */
+ ++input->bad_count;
+ return;
+ }
+ }
+
+ input->read_fn = read_file_line;
+
+ if (in_place_extension)
+ {
+ int input_fd;
+ char *tmpdir, *p;
+ security_context_t old_fscreatecon;
+ int reset_fscreatecon = 0;
+ memset (&old_fscreatecon, 0, sizeof (old_fscreatecon));
+
+ /* get the base name */
+ tmpdir = xstrdup (input->in_file_name);
+ if ((p = strrchr (tmpdir, '/')))
+ *p = 0;
+ else
+ strcpy (tmpdir, ".");
+
+ if (isatty (fileno (input->fp)))
+ panic (_("couldn't edit %s: is a terminal"), input->in_file_name);
+
+ input_fd = fileno (input->fp);
+ fstat (input_fd, &input->st);
+ if (!S_ISREG (input->st.st_mode))
+ panic (_("couldn't edit %s: not a regular file"), input->in_file_name);
+
+ if (is_selinux_enabled () > 0)
+ {
+ security_context_t con;
+ if (lgetfilecon (input->in_file_name, &con) != -1)
+ {
+ /* Save and restore the old context for the sake of w and W
+ commands. */
+ reset_fscreatecon = getfscreatecon (&old_fscreatecon) >= 0;
+ if (setfscreatecon (con) < 0)
+ fprintf (stderr, _("%s: warning: failed to set default" \
+ " file creation context to %s: %s"),
+ program_name, con, strerror (errno));
+ freecon (con);
+ }
+ else
+ {
+ if (errno != ENOSYS)
+ fprintf (stderr, _("%s: warning: failed to get" \
+ " security context of %s: %s"),
+ program_name, input->in_file_name, strerror (errno));
+ }
+ }
+
+ output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed",
+ write_mode);
+ register_cleanup_file (input->out_file_name);
+ output_file.missing_newline = false;
+ free (tmpdir);
+
+ if (reset_fscreatecon)
+ {
+ setfscreatecon (old_fscreatecon);
+ freecon (old_fscreatecon);
+ }
+
+ if (!output_file.fp)
+ panic (_("couldn't open temporary file %s: %s"), input->out_file_name,
+ strerror (errno));
+ }
+ else
+ {
+ if (input->fp && unbuffered)
+ setvbuf (input->fp, NULL, _IONBF, 0);
+ output_file.fp = stdout;
+ }
+}
+
+
+/* Clean up an input stream that we are done with. */
+static void
+closedown (struct input *input)
+{
+ input->read_fn = read_always_fail;
+ if (!input->fp)
+ return;
+
+ if (in_place_extension && output_file.fp != NULL)
+ {
+ const char *target_name;
+ int input_fd, output_fd;
+
+ target_name = input->in_file_name;
+ input_fd = fileno (input->fp);
+ output_fd = fileno (output_file.fp);
+#ifdef HAVE_FCHOWN
+ /* Try to set both UID and GID, but if that fails,
+ try to set only the GID. Ignore failure. */
+ if (fchown (output_fd, input->st.st_uid, input->st.st_gid) == -1)
+ ignore_value (fchown (output_fd, -1, input->st.st_gid));
+#endif
+ copy_acl (input->in_file_name, input_fd,
+ input->out_file_name, output_fd,
+ input->st.st_mode);
+
+ ck_fclose (input->fp);
+ ck_fclose (output_file.fp);
+ if (strcmp (in_place_extension, "*") != 0)
+ {
+ char *backup_file_name = get_backup_file_name (target_name);
+ ck_rename (target_name, backup_file_name, input->out_file_name);
+ free (backup_file_name);
+ }
+
+ ck_rename (input->out_file_name, target_name, input->out_file_name);
+ cancel_cleanup ();
+ free (input->out_file_name);
+ }
+ else
+ ck_fclose (input->fp);
+
+ input->fp = NULL;
+}
+
+/* Reset range commands so that they are marked as non-matching */
+static void
+reset_addresses (struct vector *vec)
+{
+ struct sed_cmd *cur_cmd;
+ int n;
+
+ for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++)
+ if (cur_cmd->a1
+ && cur_cmd->a1->addr_type == ADDR_IS_NUM
+ && cur_cmd->a1->addr_number == 0)
+ cur_cmd->range_state = RANGE_ACTIVE;
+ else
+ cur_cmd->range_state = RANGE_INACTIVE;
+}
+
+/* Read in the next line of input, and store it in the pattern space.
+ Return zero if there is nothing left to input. */
+static bool
+read_pattern_space (struct input *input, struct vector *the_program, int append)
+{
+ if (append_head) /* redundant test to optimize for common case */
+ dump_append_queue ();
+ replaced = false;
+ if (!append)
+ line.length = 0;
+ line.chomped = true; /* default, until proved otherwise */
+
+ while ( ! (*input->read_fn)(input) )
+ {
+ closedown (input);
+
+ if (!*input->file_list)
+ return false;
+
+ if (input->reset_at_next_file)
+ {
+ input->line_number = 0;
+ hold.length = 0;
+ reset_addresses (the_program);
+ rewind_read_files ();
+
+ /* If doing in-place editing, we will never append the
+ new-line to this file; but if the output goes to stdout,
+ we might still have to output the missing new-line. */
+ if (in_place_extension)
+ output_file.missing_newline = false;
+
+ input->reset_at_next_file = separate_files;
+ }
+
+ open_next_file (*input->file_list++, input);
+ }
+
+ ++input->line_number;
+ return true;
+}
+
+static bool
+last_file_with_data_p (struct input *input)
+{
+ for (;;)
+ {
+ int ch;
+
+ closedown (input);
+ if (!*input->file_list)
+ return true;
+ open_next_file (*input->file_list++, input);
+ if (input->fp)
+ {
+ if ((ch = getc (input->fp)) != EOF)
+ {
+ ungetc (ch, input->fp);
+ return false;
+ }
+ }
+ }
+}
+
+/* Determine if we match the `$' address. */
+static bool
+test_eof (struct input *input)
+{
+ int ch;
+
+ if (buffer.length)
+ return false;
+ if (!input->fp)
+ return separate_files || last_file_with_data_p (input);
+ if (feof (input->fp))
+ return separate_files || last_file_with_data_p (input);
+ if ((ch = getc (input->fp)) == EOF)
+ return separate_files || last_file_with_data_p (input);
+ ungetc (ch, input->fp);
+ return false;
+}
+
+/* Return non-zero if the current line matches the address
+ pointed to by `addr'. */
+static bool
+match_an_address_p (struct addr *addr, struct input *input)
+{
+ switch (addr->addr_type)
+ {
+ case ADDR_IS_NULL:
+ return true;
+
+ case ADDR_IS_REGEX:
+ return match_regex (addr->addr_regex, line.active, line.length, 0,
+ NULL, 0);
+
+ case ADDR_IS_NUM_MOD:
+ return (input->line_number >= addr->addr_number
+ && ((input->line_number - addr->addr_number)
+ % addr->addr_step) == 0);
+
+ case ADDR_IS_STEP:
+ case ADDR_IS_STEP_MOD:
+ /* reminder: these are only meaningful for a2 addresses */
+ /* a2->addr_number needs to be recomputed each time a1 address
+ matches for the step and step_mod types */
+ return (addr->addr_number <= input->line_number);
+
+ case ADDR_IS_LAST:
+ return test_eof (input);
+
+ case ADDR_IS_NUM:
+ /* reminder: these are only meaningful for a1 addresses */
+ return (addr->addr_number == input->line_number);
+
+ default:
+ panic ("INTERNAL ERROR: bad address type");
+ }
+ /*NOTREACHED*/
+ return false;
+}
+
+/* return non-zero if current address is valid for cmd */
+static bool
+match_address_p (struct sed_cmd *cmd, struct input *input)
+{
+ if (!cmd->a1)
+ return true;
+
+ if (cmd->range_state != RANGE_ACTIVE)
+ {
+ if (!cmd->a2)
+ return match_an_address_p (cmd->a1, input);
+
+ /* Find if we are going to activate a range. Handle ADDR_IS_NUM
+ specially: it represent an "absolute" state, it should not
+ be computed like regexes. */
+ if (cmd->a1->addr_type == ADDR_IS_NUM)
+ {
+ if (cmd->range_state == RANGE_CLOSED
+ || input->line_number < cmd->a1->addr_number)
+ return false;
+ }
+ else
+ {
+ if (!match_an_address_p (cmd->a1, input))
+ return false;
+ }
+
+ /* Ok, start a new range. */
+ cmd->range_state = RANGE_ACTIVE;
+ switch (cmd->a2->addr_type)
+ {
+ case ADDR_IS_REGEX:
+ /* Always include at least two lines. */
+ return true;
+ case ADDR_IS_NUM:
+ /* Same handling as below, but always include at least one line. */
+ if (input->line_number >= cmd->a2->addr_number)
+ cmd->range_state = RANGE_CLOSED;
+ return (input->line_number <= cmd->a2->addr_number
+ || match_an_address_p (cmd->a1, input));
+ case ADDR_IS_STEP:
+ cmd->a2->addr_number = input->line_number + cmd->a2->addr_step;
+ return true;
+ case ADDR_IS_STEP_MOD:
+ cmd->a2->addr_number = input->line_number + cmd->a2->addr_step
+ - (input->line_number%cmd->a2->addr_step);
+ return true;
+ default:
+ break;
+ }
+ }
+
+ /* cmd->range_state == RANGE_ACTIVE. Check if the range is
+ ending; also handle ADDR_IS_NUM specially in this case. */
+
+ if (cmd->a2->addr_type == ADDR_IS_NUM)
+ {
+ /* If the second address is a line number, and if we got past
+ that line, fail to match (it can happen when you jump
+ over such addresses with `b' and `t'. Use RANGE_CLOSED
+ so that the range is not re-enabled anymore. */
+ if (input->line_number >= cmd->a2->addr_number)
+ cmd->range_state = RANGE_CLOSED;
+
+ return (input->line_number <= cmd->a2->addr_number);
+ }
+
+ /* Other addresses are treated as usual. */
+ if (match_an_address_p (cmd->a2, input))
+ cmd->range_state = RANGE_CLOSED;
+
+ return true;
+}
+
+static void
+do_list (int line_len)
+{
+ unsigned char *p = (unsigned char *)line.active;
+ countT len = line.length;
+ countT width = 0;
+ char obuf[180]; /* just in case we encounter a 512-bit char (;-) */
+ char *o;
+ size_t olen;
+ FILE *fp = output_file.fp;
+
+ output_missing_newline (&output_file);
+ for (; len--; ++p) {
+ o = obuf;
+
+ /* Some locales define 8-bit characters as printable. This makes the
+ testsuite fail at 8to7.sed because the `l' command in fact will not
+ convert the 8-bit characters. */
+#if defined isascii || defined HAVE_ISASCII
+ if (isascii (*p) && ISPRINT (*p)) {
+#else
+ if (ISPRINT (*p)) {
+#endif
+ *o++ = *p;
+ if (*p == '\\')
+ *o++ = '\\';
+ } else {
+ *o++ = '\\';
+ switch (*p) {
+#if defined __STDC__ && __STDC__-0
+ case '\a': *o++ = 'a'; break;
+#else /* Not STDC; we'll just assume ASCII */
+ case 007: *o++ = 'a'; break;
+#endif
+ case '\b': *o++ = 'b'; break;
+ case '\f': *o++ = 'f'; break;
+ case '\n': *o++ = 'n'; break;
+ case '\r': *o++ = 'r'; break;
+ case '\t': *o++ = 't'; break;
+ case '\v': *o++ = 'v'; break;
+ default:
+ sprintf (o, "%03o", *p);
+ o += strlen (o);
+ break;
+ }
+ }
+ olen = o - obuf;
+ if (width+olen >= line_len && line_len > 0) {
+ ck_fwrite ("\\", 1, 1, fp);
+ ck_fwrite (&buffer_delimiter, 1, 1, fp);
+ width = 0;
+ }
+ ck_fwrite (obuf, 1, olen, fp);
+ width += olen;
+ }
+ ck_fwrite ("$", 1, 1, fp);
+ ck_fwrite (&buffer_delimiter, 1, 1, fp);
+ flush_output (fp);
+}
+
+
+static void append_replacement (struct line *buf, struct replacement *p,
+ struct re_registers *regs)
+{
+ enum replacement_types repl_mod = 0;
+
+ for (; p; p=p->next)
+ {
+ int i = p->subst_id;
+ enum replacement_types curr_type;
+
+ /* Apply a \[lu] modifier that was given earlier, but which we
+ have not had yet the occasion to apply. But don't do it
+ if this replacement has a modifier of its own. */
+ curr_type = (p->repl_type & REPL_MODIFIERS)
+ ? p->repl_type
+ : p->repl_type | repl_mod;
+
+ repl_mod = 0;
+ if (p->prefix_length)
+ {
+ str_append_modified (buf, p->prefix, p->prefix_length,
+ curr_type);
+ curr_type &= ~REPL_MODIFIERS;
+ }
+
+ if (0 <= i && i < regs->num_regs)
+ {
+ if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS)
+ /* Save this modifier, we shall apply it later.
+ e.g. in s/()([a-z])/\u\1\2/
+ the \u modifier is applied to \2, not \1 */
+ repl_mod = curr_type & REPL_MODIFIERS;
+
+ else if (regs->end[i] != regs->start[i])
+ str_append_modified (buf, line.active + regs->start[i],
+ (size_t)(regs->end[i] - regs->start[i]),
+ curr_type);
+ }
+ }
+}
+
+static void
+do_subst (struct subst *sub)
+{
+ size_t start = 0; /* where to start scan for (next) match in LINE */
+ size_t last_end = 0; /* where did the last successful match end in LINE */
+ countT count = 0; /* number of matches found */
+ bool again = true;
+
+ static struct re_registers regs;
+
+ line_reset (&s_accum, &line);
+
+ /* The first part of the loop optimizes s/xxx// when xxx is at the
+ start, and s/xxx$// */
+ if (!match_regex (sub->regx, line.active, line.length, start,
+ &regs, sub->max_id + 1))
+ return;
+
+ if (debug)
+ {
+ if (regs.num_regs>0 && regs.start[0] != -1)
+ puts ("MATCHED REGEX REGISTERS");
+
+ for (int i = 0; i < regs.num_regs; ++i)
+ {
+ if (regs.start[i] == -1)
+ break;
+
+ printf (" regex[%d] = %d-%d '", i,
+ (int)regs.start[i], (int)regs.end[i]);
+
+ if (regs.start[i] != regs.end[i])
+ fwrite (line.active + regs.start[i], regs.end[i] -regs.start[i],
+ 1, stdout);
+
+ puts ("'");
+ }
+ }
+
+ if (!sub->replacement && sub->numb <= 1)
+ {
+ if (regs.start[0] == 0 && !sub->global)
+ {
+ /* We found a match, set the `replaced' flag. */
+ replaced = true;
+
+ line.active += regs.end[0];
+ line.length -= regs.end[0];
+ line.alloc -= regs.end[0];
+ goto post_subst;
+ }
+ else if (regs.end[0] == line.length)
+ {
+ /* We found a match, set the `replaced' flag. */
+ replaced = true;
+
+ line.length = regs.start[0];
+ goto post_subst;
+ }
+ }
+
+ do
+ {
+ size_t offset = regs.start[0];
+ size_t matched = regs.end[0] - regs.start[0];
+
+ /* Copy stuff to the left of this match into the output string. */
+ if (start < offset)
+ {
+ str_append (&s_accum, line.active + start, offset - start);
+ start = offset;
+ }
+
+ /* If we're counting up to the Nth match, are we there yet?
+ And even if we are there, there is another case we have to
+ skip: are we matching an empty string immediately following
+ another match?
+
+ This latter case avoids that baaaac, when passed through
+ s,a*,x,g, gives `xbxxcx' instead of xbxcx. This behavior is
+ unacceptable because it is not consistently applied (for
+ example, `baaaa' gives `xbx', not `xbxx'). */
+ if ((matched > 0 || count == 0 || offset > last_end)
+ && ++count >= sub->numb)
+ {
+ /* We found a match, set the `replaced' flag. */
+ replaced = true;
+
+ /* Now expand the replacement string into the output string. */
+ append_replacement (&s_accum, sub->replacement, &regs);
+ again = sub->global;
+ }
+ else
+ {
+ /* The match was not replaced. Copy the text until its
+ end; if it was vacuous, skip over one character and
+ add that character to the output. */
+ if (matched == 0)
+ {
+ if (start < line.length)
+ matched = 1;
+ else
+ break;
+ }
+
+ str_append (&s_accum, line.active + offset, matched);
+ }
+
+ /* Start after the match. last_end is the real end of the matched
+ substring, excluding characters that were skipped in case the RE
+ matched the empty string. */
+ start = offset + matched;
+ last_end = regs.end[0];
+ }
+ while (again
+ && start <= line.length
+ && match_regex (sub->regx, line.active, line.length, start,
+ &regs, sub->max_id + 1));
+
+ /* Copy stuff to the right of the last match into the output string. */
+ if (start < line.length)
+ str_append (&s_accum, line.active + start, line.length-start);
+ s_accum.chomped = line.chomped;
+
+ /* Exchange line and s_accum. This can be much cheaper
+ than copying s_accum.active into line.text (for huge lines). */
+ line_exchange (&line, &s_accum, false);
+
+ /* Finish up. */
+ if (count < sub->numb)
+ return;
+
+ post_subst:
+ if (sub->print & 1)
+ output_line (line.active, line.length, line.chomped, &output_file);
+
+ if (sub->eval)
+ {
+#ifdef HAVE_POPEN
+ FILE *pipe_fp;
+ line_reset (&s_accum, NULL);
+
+ str_append (&line, "", 1);
+ pipe_fp = popen (line.active, "r");
+
+ if (pipe_fp != NULL)
+ {
+ while (!feof (pipe_fp))
+ {
+ char buf[4096];
+ int n = fread (buf, sizeof (char), 4096, pipe_fp);
+ if (n > 0)
+ str_append (&s_accum, buf, n);
+ }
+
+ pclose (pipe_fp);
+
+ /* Exchange line and s_accum. This can be much cheaper than copying
+ s_accum.active into line.text (for huge lines). See comment above
+ for 'g' as to while the third argument is incorrect anyway. */
+ line_exchange (&line, &s_accum, true);
+ if (line.length
+ && line.active[line.length - 1] == buffer_delimiter)
+ line.length--;
+ }
+ else
+ panic (_("error in subprocess"));
+#else
+ panic (_("option `e' not supported"));
+#endif
+ }
+
+ if (sub->print & 2)
+ output_line (line.active, line.length, line.chomped, &output_file);
+ if (sub->outf)
+ output_line (line.active, line.length, line.chomped, sub->outf);
+}
+
+/* Translate the global input LINE via TRANS.
+ This function handles the multi-byte case. */
+static void
+translate_mb (char *const *trans)
+{
+ size_t idx; /* index in the input line. */
+ mbstate_t mbstate = { 0, };
+ for (idx = 0; idx < line.length;)
+ {
+ unsigned int i;
+ size_t mbclen = MBRLEN (line.active + idx,
+ line.length - idx, &mbstate);
+ /* An invalid sequence, or a truncated multibyte
+ character. Treat it as a single-byte character. */
+ if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+ mbclen = 1;
+
+ /* `i' indicate i-th translate pair. */
+ for (i = 0; trans[2*i] != NULL; i++)
+ {
+ if (STREQ_LEN (line.active + idx, trans[2*i], mbclen))
+ {
+ bool move_remain_buffer = false;
+ const char *tr = trans[2*i+1];
+ size_t trans_len = *tr == '\0' ? 1 : strlen (tr);
+
+ if (mbclen < trans_len)
+ {
+ size_t new_len = (line.length + 1
+ + trans_len - mbclen);
+ /* We must extend the line buffer. */
+ if (line.alloc < new_len)
+ {
+ /* And we must resize the buffer. */
+ resize_line (&line, new_len);
+ }
+ move_remain_buffer = true;
+ }
+ else if (mbclen > trans_len)
+ {
+ /* We must truncate the line buffer. */
+ move_remain_buffer = true;
+ }
+ size_t prev_idx = idx;
+ if (move_remain_buffer)
+ {
+ /* Move the remaining with \0. */
+ char const *move_from = (line.active + idx + mbclen);
+ char *move_to = line.active + idx + trans_len;
+ size_t move_len = line.length + 1 - idx - mbclen;
+ size_t move_offset = trans_len - mbclen;
+ memmove (move_to, move_from, move_len);
+ line.length += move_offset;
+ idx += move_offset;
+ }
+ memcpy (line.active + prev_idx, trans[2*i+1],
+ trans_len);
+ break;
+ }
+ }
+ idx += mbclen;
+ }
+}
+
+static void
+debug_print_end_of_cycle (void)
+{
+ puts ("END-OF-CYCLE:");
+}
+
+static void
+debug_print_input (const struct input *input)
+{
+ bool is_stdin = (input->fp && fileno (input->fp) == 0);
+
+ printf ("INPUT: '%s' line %lu\n",
+ is_stdin?"STDIN":input->in_file_name,
+ input->line_number);
+}
+
+static void
+debug_print_line (struct line *ln)
+{
+ const char *src = ln->active ? ln->active : ln->text;
+ size_t l = ln->length;
+ const char *p = src;
+
+ fputs ( (ln == &hold) ? "HOLD: ":"PATTERN: ", stdout);
+ while (l--)
+ debug_print_char (*p++);
+ putchar ('\n');
+}
+
+/* Execute the program `vec' on the current input line.
+ Return exit status if caller should quit, -1 otherwise. */
+static int
+execute_program (struct vector *vec, struct input *input)
+{
+ struct sed_cmd *cur_cmd;
+ struct sed_cmd *end_cmd;
+
+ cur_cmd = vec->v;
+ end_cmd = vec->v + vec->v_length;
+ while (cur_cmd < end_cmd)
+ {
+ if (debug)
+ {
+ fputs ("COMMAND: ", stdout);
+ debug_print_command (vec, cur_cmd);
+ }
+
+ if (match_address_p (cur_cmd, input) != cur_cmd->addr_bang)
+ {
+ switch (cur_cmd->cmd)
+ {
+ case 'a':
+ {
+ struct append_queue *aq = next_append_slot ();
+ aq->text = cur_cmd->x.cmd_txt.text;
+ aq->textlen = cur_cmd->x.cmd_txt.text_length;
+ }
+ break;
+
+ case '{':
+ case 'b':
+ cur_cmd = vec->v + cur_cmd->x.jump_index;
+ continue;
+
+ case '}':
+ case '#':
+ case ':':
+ /* Executing labels and block-ends are easy. */
+ break;
+
+ case 'c':
+ if (cur_cmd->range_state != RANGE_ACTIVE)
+ output_line (cur_cmd->x.cmd_txt.text,
+ cur_cmd->x.cmd_txt.text_length - 1, true,
+ &output_file);
+ /* POSIX.2 is silent about c starting a new cycle,
+ but it seems to be expected (and make sense). */
+ FALLTHROUGH;
+ case 'd':
+ if (debug)
+ debug_print_end_of_cycle ();
+ return -1;
+
+ case 'D':
+ {
+ char *p = memchr (line.active, buffer_delimiter, line.length);
+ if (!p)
+ return -1;
+
+ ++p;
+ line.alloc -= p - line.active;
+ line.length -= p - line.active;
+ line.active += p - line.active;
+
+ /* reset to start next cycle without reading a new line: */
+ cur_cmd = vec->v;
+
+ if (debug)
+ debug_print_line (&line);
+ continue;
+ }
+
+ case 'e': {
+#ifndef HAVE_POPEN
+ panic (_("`e' command not supported"));
+#else
+ FILE *pipe_fp;
+ int cmd_length = cur_cmd->x.cmd_txt.text_length;
+ line_reset (&s_accum, NULL);
+
+ if (!cmd_length)
+ {
+ str_append (&line, "", 1);
+ pipe_fp = popen (line.active, "r");
+ }
+ else
+ {
+ cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0;
+ pipe_fp = popen (cur_cmd->x.cmd_txt.text, "r");
+ output_missing_newline (&output_file);
+ }
+
+ if (pipe_fp == NULL)
+ panic (_("error in subprocess"));
+
+ {
+ char buf[4096];
+ int n;
+ while (!feof (pipe_fp))
+ if ((n = fread (buf, sizeof (char), 4096, pipe_fp)) > 0)
+ {
+ if (!cmd_length)
+ str_append (&s_accum, buf, n);
+ else
+ ck_fwrite (buf, 1, n, output_file.fp);
+ }
+
+ pclose (pipe_fp);
+ if (!cmd_length)
+ {
+ /* Store into pattern space for plain `e' commands */
+ if (s_accum.length
+ && (s_accum.active[s_accum.length - 1]
+ == buffer_delimiter))
+ s_accum.length--;
+
+ /* Exchange line and s_accum. This can be much
+ cheaper than copying s_accum.active into line.text
+ (for huge lines). See comment above for 'g' as
+ to while the third argument is incorrect anyway. */
+ line_exchange (&line, &s_accum, true);
+ }
+ else
+ flush_output (output_file.fp);
+ }
+#endif
+ break;
+ }
+
+ case 'g':
+ /* We do not have a really good choice for the third parameter.
+ The problem is that hold space and the input file might as
+ well have different states; copying it from hold space means
+ that subsequent input might be read incorrectly, while
+ keeping it as in pattern space means that commands operating
+ on the moved buffer might consider a wrong character set.
+ We keep it true because it's what sed <= 4.1.5 did. */
+ line_copy (&hold, &line, true);
+ if (debug)
+ debug_print_line (&hold);
+ break;
+
+ case 'G':
+ /* We do not have a really good choice for the third parameter.
+ The problem is that hold space and pattern space might as
+ well have different states. So, true is as wrong as false.
+ We keep it true because it's what sed <= 4.1.5 did, but
+ we could consider having line_ap. */
+ line_append (&hold, &line, true);
+ if (debug)
+ debug_print_line (&line);
+ break;
+
+ case 'h':
+ /* Here, it is ok to have true. */
+ line_copy (&line, &hold, true);
+ if (debug)
+ debug_print_line (&hold);
+ break;
+
+ case 'H':
+ /* See comment above for 'G' regarding the third parameter. */
+ line_append (&line, &hold, true);
+ if (debug)
+ debug_print_line (&hold);
+ break;
+
+ case 'i':
+ output_line (cur_cmd->x.cmd_txt.text,
+ cur_cmd->x.cmd_txt.text_length - 1,
+ true, &output_file);
+ break;
+
+ case 'l':
+ do_list (cur_cmd->x.int_arg == -1
+ ? lcmd_out_line_len
+ : cur_cmd->x.int_arg);
+ break;
+
+ case 'n':
+ if (!no_default_output)
+ output_line (line.active, line.length, line.chomped,
+ &output_file);
+ if (test_eof (input) || !read_pattern_space (input, vec, false))
+ {
+ if (debug)
+ debug_print_end_of_cycle ();
+ return -1;
+ }
+
+ if (debug)
+ debug_print_line (&line);
+ break;
+
+ case 'N':
+ str_append (&line, &buffer_delimiter, 1);
+
+ if (test_eof (input) || !read_pattern_space (input, vec, true))
+ {
+ if (debug)
+ debug_print_end_of_cycle ();
+ line.length--;
+ if (posixicity == POSIXLY_EXTENDED && !no_default_output)
+ output_line (line.active, line.length, line.chomped,
+ &output_file);
+ return -1;
+ }
+ if (debug)
+ debug_print_line (&line);
+ break;
+
+ case 'p':
+ output_line (line.active, line.length, line.chomped,
+ &output_file);
+ break;
+
+ case 'P':
+ {
+ char *p = memchr (line.active, buffer_delimiter, line.length);
+ output_line (line.active, p ? p - line.active : line.length,
+ p ? true : line.chomped, &output_file);
+ }
+ break;
+
+ case 'q':
+ if (!no_default_output)
+ output_line (line.active, line.length, line.chomped,
+ &output_file);
+ dump_append_queue ();
+ FALLTHROUGH;
+
+ case 'Q':
+ return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg;
+
+ case 'r':
+ if (cur_cmd->x.fname)
+ {
+ struct append_queue *aq = next_append_slot ();
+ aq->fname = cur_cmd->x.fname;
+ }
+ break;
+
+ case 'R':
+ if (cur_cmd->x.inf->fp && !feof (cur_cmd->x.inf->fp))
+ {
+ struct append_queue *aq;
+ size_t buflen;
+ char *text = NULL;
+ int result;
+
+ result = ck_getdelim (&text, &buflen, buffer_delimiter,
+ cur_cmd->x.inf->fp);
+ if (result != EOF)
+ {
+ aq = next_append_slot ();
+ aq->free = true;
+ aq->text = text;
+ aq->textlen = result;
+ }
+ else
+ {
+ /* The external input file (for R command) reached EOF,
+ the 'text' buffer will not be added to the append queue
+ so release it */
+ free (text);
+ }
+ }
+ break;
+
+ case 's':
+ do_subst (cur_cmd->x.cmd_subst);
+ if (debug)
+ debug_print_line (&line);
+ break;
+
+ case 't':
+ if (replaced)
+ {
+ replaced = false;
+ cur_cmd = vec->v + cur_cmd->x.jump_index;
+ continue;
+ }
+ break;
+
+ case 'T':
+ if (!replaced)
+ {
+ cur_cmd = vec->v + cur_cmd->x.jump_index;
+ continue;
+ }
+ else
+ replaced = false;
+ break;
+
+ case 'w':
+ if (cur_cmd->x.outf->fp)
+ output_line (line.active, line.length,
+ line.chomped, cur_cmd->x.outf);
+ break;
+
+ case 'W':
+ if (cur_cmd->x.outf->fp)
+ {
+ char *p = memchr (line.active, buffer_delimiter, line.length);
+ output_line (line.active, p ? p - line.active : line.length,
+ p ? true : line.chomped, cur_cmd->x.outf);
+ }
+ break;
+
+ case 'x':
+ /* See comment above for 'g' regarding the third parameter. */
+ line_exchange (&line, &hold, false);
+ if (debug)
+ {
+ debug_print_line (&line);
+ debug_print_line (&hold);
+ }
+ break;
+
+ case 'y':
+ if (mb_cur_max > 1)
+ translate_mb (cur_cmd->x.translatemb);
+ else
+ {
+ unsigned char *p, *e;
+ p = (unsigned char *)line.active;
+ for (e=p+line.length; p<e; ++p)
+ *p = cur_cmd->x.translate[*p];
+ }
+ if (debug)
+ debug_print_line (&line);
+ break;
+
+ case 'z':
+ line.length = 0;
+ if (debug)
+ debug_print_line (&line);
+ break;
+
+ case '=':
+ output_missing_newline (&output_file);
+ fprintf (output_file.fp, "%lu%c",
+ (unsigned long)input->line_number,
+ buffer_delimiter);
+ flush_output (output_file.fp);
+ break;
+
+ case 'F':
+ output_missing_newline (&output_file);
+ fprintf (output_file.fp, "%s%c",
+ input->in_file_name,
+ buffer_delimiter);
+ flush_output (output_file.fp);
+ break;
+
+ default:
+ panic ("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd);
+ }
+ }
+
+ /* this is buried down here so that a "continue" statement can skip it */
+ ++cur_cmd;
+ }
+
+ if (debug)
+ debug_print_end_of_cycle ();
+ if (!no_default_output)
+ output_line (line.active, line.length, line.chomped, &output_file);
+ return -1;
+}
+
+
+/* Apply the compiled script to all the named files. */
+int
+process_files (struct vector *the_program, char **argv)
+{
+ static char dash[] = "-";
+ static char *stdin_argv[2] = { dash, NULL };
+ struct input input;
+ int status;
+
+ line_init (&line, NULL, INITIAL_BUFFER_SIZE);
+ line_init (&hold, NULL, 0);
+ line_init (&buffer, NULL, 0);
+
+ input.reset_at_next_file = true;
+ if (argv && *argv)
+ input.file_list = argv;
+ else if (in_place_extension)
+ panic (_("no input files"));
+ else
+ input.file_list = stdin_argv;
+
+ input.bad_count = 0;
+ input.line_number = 0;
+ input.read_fn = read_always_fail;
+ input.fp = NULL;
+
+ status = EXIT_SUCCESS;
+ while (read_pattern_space (&input, the_program, false))
+ {
+ if (debug)
+ {
+ debug_print_input (&input);
+ debug_print_line (&line);
+ }
+
+ status = execute_program (the_program, &input);
+ if (status == -1)
+ status = EXIT_SUCCESS;
+ else
+ break;
+ }
+ closedown (&input);
+
+#ifdef lint
+ /* We're about to exit, so these free()s are redundant.
+ But if we're running under a memory-leak detecting
+ implementation of malloc(), we want to explicitly
+ deallocate in order to avoid extraneous noise from
+ the allocator. */
+ release_append_queue ();
+ free (buffer.text);
+ free (hold.text);
+ free (line.text);
+ free (s_accum.text);
+#endif /* lint */
+
+ if (input.bad_count)
+ status = EXIT_BAD_INPUT;
+
+ return status;
+}
diff --git a/sed/local.mk b/sed/local.mk
new file mode 100644
index 0000000..5ccaaf4
--- /dev/null
+++ b/sed/local.mk
@@ -0,0 +1,58 @@
+# Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+bin_PROGRAMS += sed/sed
+
+localedir = $(datadir)/locale
+
+sed_sed_SOURCES = \
+ sed/compile.c \
+ sed/debug.c \
+ sed/execute.c \
+ sed/mbcs.c \
+ sed/regexp.c \
+ sed/sed.c \
+ sed/utils.c
+
+noinst_HEADERS += \
+ sed/sed.h \
+ sed/utils.h
+
+sed_sed_CPPFLAGS = $(AM_CPPFLAGS) -DLOCALEDIR=\"$(localedir)\"
+sed_sed_CFLAGS = $(AM_CFLAGS) $(WARN_CFLAGS) $(WERROR_CFLAGS)
+sed_sed_LDADD = sed/libver.a lib/libsed.a $(INTLLIBS) $(LIB_ACL) $(LIB_SELINUX)
+sed_sed_DEPENDENCIES = lib/libsed.a sed/libver.a
+
+$(sed_sed_OBJECTS): $(BUILT_SOURCES)
+
+BUILT_SOURCES += sed/version.c
+DISTCLEANFILES += sed/version.c
+sed/version.c: Makefile
+ $(AM_V_GEN)rm -f $@
+ $(AM_V_at)printf '#include <config.h>\n' > $@t
+ $(AM_V_at)printf 'char const *Version = "$(PACKAGE_VERSION)";\n' >> $@t
+ $(AM_V_at)chmod a-w $@t
+ $(AM_V_at)mv $@t $@
+
+BUILT_SOURCES += sed/version.h
+DISTCLEANFILES += sed/version.h
+sed/version.h: Makefile
+ $(AM_V_GEN)rm -f $@
+ $(AM_V_at)printf 'extern char const *Version;\n' > $@t
+ $(AM_V_at)chmod a-w $@t
+ $(AM_V_at)mv $@t $@
+
+noinst_LIBRARIES += sed/libver.a
+nodist_sed_libver_a_SOURCES = sed/version.c sed/version.h
diff --git a/sed/mbcs.c b/sed/mbcs.c
new file mode 100644
index 0000000..c94b219
--- /dev/null
+++ b/sed/mbcs.c
@@ -0,0 +1,76 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 2003-2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+#include "sed.h"
+#include <stdlib.h>
+#include <string.h>
+
+#include "localcharset.h"
+
+int mb_cur_max;
+bool is_utf8;
+
+/* Return non-zero if CH is part of a valid multibyte sequence:
+ Either incomplete yet valid sequence (in case of a leading byte),
+ or the last byte of a valid multibyte sequence.
+
+ Return zero in all other cases:
+ CH is a valid single-byte character (e.g. 0x01-0x7F in UTF-8 locales);
+ CH is an invalid byte in a multibyte sequence for the currentl locale,
+ CH is the NUL byte.
+
+ Reset CUR_STAT in the case of an invalid byte.
+*/
+int
+is_mb_char (int ch, mbstate_t *cur_stat)
+{
+ const char c = ch ;
+ const int mb_pending = !mbsinit (cur_stat);
+ const int result = mbrtowc (NULL, &c, 1, cur_stat);
+
+ switch (result)
+ {
+ case -2: /* Beginning or middle of valid multibyte sequence */
+ return 1;
+
+ case -1: /* Invalid sequence, byte treated like a single-byte character */
+ memset (cur_stat, 0, sizeof (mbstate_t));
+ return 0;
+
+ case 1: /* A valid byte, check if part of on-going multibyte sequence */
+ return mb_pending;
+
+ case 0: /* Special case of mbrtowc(3): the NUL character */
+ /* TODO: test this */
+ return 1;
+
+ default: /* Should never happen, as per mbrtowc(3) documentation */
+ panic ("is_mb_char: mbrtowc (0x%x) returned %d",
+ (unsigned int) ch, result);
+ }
+}
+
+void
+initialize_mbcs (void)
+{
+ /* For UTF-8, we know that the encoding is stateless. */
+ const char *codeset_name;
+
+ codeset_name = locale_charset ();
+ is_utf8 = (strcmp (codeset_name, "UTF-8") == 0);
+
+ mb_cur_max = MB_CUR_MAX;
+}
diff --git a/sed/regexp.c b/sed/regexp.c
new file mode 100644
index 0000000..4ac06d6
--- /dev/null
+++ b/sed/regexp.c
@@ -0,0 +1,379 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1999-2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+#include "sed.h"
+
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "xalloc.h"
+
+#ifdef gettext_noop
+# define N_(String) gettext_noop(String)
+#else
+# define N_(String) (String)
+#endif
+
+extern bool use_extended_syntax_p;
+
+static const char errors[] =
+ "no previous regular expression\0"
+ "cannot specify modifiers on empty regexp";
+
+#define NO_REGEX (errors)
+#define BAD_MODIF (NO_REGEX + sizeof(N_("no previous regular expression")))
+
+
+void
+dfaerror (char const *mesg)
+{
+ panic ("%s", mesg);
+}
+
+void
+dfawarn (char const *mesg)
+{
+ if (!getenv ("POSIXLY_CORRECT"))
+ dfaerror (mesg);
+}
+
+
+static void
+compile_regex_1 (struct regex *new_regex, int needed_sub)
+{
+ const char *error;
+ int syntax = ((extended_regexp_flags & REG_EXTENDED)
+ ? RE_SYNTAX_POSIX_EXTENDED
+ : RE_SYNTAX_POSIX_BASIC);
+
+ syntax &= ~RE_DOT_NOT_NULL;
+ syntax |= RE_NO_POSIX_BACKTRACKING;
+
+ switch (posixicity)
+ {
+ case POSIXLY_EXTENDED:
+ syntax &= ~RE_UNMATCHED_RIGHT_PAREN_ORD;
+ break;
+ case POSIXLY_CORRECT:
+ syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD;
+ break;
+ case POSIXLY_BASIC:
+ syntax |= RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS;
+ if (!(extended_regexp_flags & REG_EXTENDED))
+ syntax |= RE_LIMITED_OPS;
+ break;
+ }
+
+ if (new_regex->flags & REG_ICASE)
+ syntax |= RE_ICASE;
+ else
+ new_regex->pattern.fastmap = malloc (1 << (sizeof (char) * 8));
+ syntax |= needed_sub ? 0 : RE_NO_SUB;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (new_regex->flags & REG_NEWLINE)
+ {
+ /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ }
+
+ re_set_syntax (syntax);
+ error = re_compile_pattern (new_regex->re, new_regex->sz,
+ &new_regex->pattern);
+ new_regex->pattern.newline_anchor =
+ buffer_delimiter == '\n' && (new_regex->flags & REG_NEWLINE) != 0;
+
+ new_regex->pattern.translate = NULL;
+#ifndef RE_ICASE
+ if (new_regex->flags & REG_ICASE)
+ {
+ static char translate[1 << (sizeof (char) * 8)];
+ int i;
+ for (i = 0; i < sizeof (translate) / sizeof (char); i++)
+ translate[i] = tolower (i);
+
+ new_regex->pattern.translate = translate;
+ }
+#endif
+
+ if (error)
+ bad_prog (error);
+
+ /* Just to be sure, I mark this as not POSIXLY_CORRECT behavior */
+ if (needed_sub
+ && new_regex->pattern.re_nsub < needed_sub - 1
+ && posixicity == POSIXLY_EXTENDED)
+ {
+ char buf[200];
+ sprintf (buf, _("invalid reference \\%d on `s' command's RHS"),
+ needed_sub - 1);
+ bad_prog (buf);
+ }
+
+ int dfaopts = buffer_delimiter == '\n' ? 0 : DFA_EOL_NUL;
+ new_regex->dfa = dfaalloc ();
+ dfasyntax (new_regex->dfa, &localeinfo, syntax, dfaopts);
+ dfacomp (new_regex->re, new_regex->sz, new_regex->dfa, 1);
+
+ /* The patterns which consist of only ^ or $ often appear in
+ substitution, but regex and dfa are not good at them, as regex does
+ not build fastmap, and as all in buffer must be scanned for $. So
+ we mark them to handle manually. */
+ if (new_regex->sz == 1)
+ {
+ if (new_regex->re[0] == '^')
+ new_regex->begline = true;
+ if (new_regex->re[0] == '$')
+ new_regex->endline = true;
+ }
+}
+
+struct regex *
+compile_regex (struct buffer *b, int flags, int needed_sub)
+{
+ struct regex *new_regex;
+ size_t re_len;
+
+ /* // matches the last RE */
+ if (size_buffer (b) == 0)
+ {
+ if (flags > 0)
+ bad_prog (_(BAD_MODIF));
+ return NULL;
+ }
+
+ re_len = size_buffer (b);
+ new_regex = xzalloc (sizeof (struct regex) + re_len - 1);
+ new_regex->flags = flags;
+ memcpy (new_regex->re, get_buffer (b), re_len);
+
+ /* GNU regex does not process \t & co. */
+ new_regex->sz = normalize_text (new_regex->re, re_len, TEXT_REGEX);
+
+ compile_regex_1 (new_regex, needed_sub);
+ return new_regex;
+}
+
+int
+match_regex (struct regex *regex, char *buf, size_t buflen,
+ size_t buf_start_offset, struct re_registers *regarray,
+ int regsize)
+{
+ int ret;
+ static struct regex *regex_last;
+
+ /* printf ("Matching from %d/%d\n", buf_start_offset, buflen); */
+
+ /* Keep track of the last regexp matched. */
+ if (!regex)
+ {
+ regex = regex_last;
+ if (!regex_last)
+ bad_prog (_(NO_REGEX));
+ }
+ else
+ regex_last = regex;
+
+ /* gnulib's re_search uses signed-int as length */
+ if (buflen >= INT_MAX)
+ panic (_("regex input buffer length larger than INT_MAX"));
+
+ if (regex->pattern.no_sub && regsize)
+ {
+ /* Re-compiling an existing regex, free the previously allocated
+ structures. */
+ if (regex->dfa)
+ {
+ dfafree (regex->dfa);
+ free (regex->dfa);
+ regex->dfa = NULL;
+ }
+ regfree (&regex->pattern);
+
+ compile_regex_1 (regex, regsize);
+ }
+
+ regex->pattern.regs_allocated = REGS_REALLOCATE;
+
+ /* Optimized handling for '^' and '$' patterns */
+ if (regex->begline || regex->endline)
+ {
+ size_t offset;
+
+ if (regex->endline)
+ {
+ const char *p = NULL;
+
+ if (regex->flags & REG_NEWLINE)
+ p = memchr (buf + buf_start_offset, buffer_delimiter,
+ buflen - buf_start_offset);
+
+ offset = p ? p - buf : buflen;
+ }
+ else if (buf_start_offset == 0)
+ /* begline anchor, starting at beginning of the buffer. */
+ offset = 0;
+ else if (!(regex->flags & REG_NEWLINE))
+ /* begline anchor, starting in the middle of the text buffer,
+ and multiline regex is not specified - will never match.
+ Example: seq 2 | sed 'N;s/^/X/g' */
+ return 0;
+ else if (buf[buf_start_offset - 1] == buffer_delimiter)
+ /* begline anchor, starting in the middle of the text buffer,
+ with multiline match, and the current character
+ is the line delimiter - start here.
+ Example: seq 2 | sed 'N;s/^/X/mg' */
+ offset = buf_start_offset;
+ else
+ {
+ /* begline anchor, starting in the middle of the search buffer,
+ all previous optimizions didn't work: search
+ for the next line delimiter character in the buffer,
+ and start from there if found. */
+ const char *p = memchr (buf + buf_start_offset, buffer_delimiter,
+ buflen - buf_start_offset);
+
+ if (p == NULL)
+ return 0;
+
+ offset = p - buf + 1;
+ }
+
+ if (regsize)
+ {
+ size_t i;
+
+ if (!regarray->start)
+ {
+ regarray->start = XCALLOC (1, regoff_t);
+ regarray->end = XCALLOC (1, regoff_t);
+ regarray->num_regs = 1;
+ }
+
+ regarray->start[0] = offset;
+ regarray->end[0] = offset;
+
+ for (i = 1 ; i < regarray->num_regs; ++i)
+ regarray->start[i] = regarray->end[i] = -1;
+ }
+
+ return 1;
+ }
+
+ if (buf_start_offset == 0)
+ {
+ struct dfa *superset = dfasuperset (regex->dfa);
+
+ if (superset && !dfaexec (superset, buf, buf + buflen, true, NULL, NULL))
+ return 0;
+
+ if ((!regsize && (regex->flags & REG_NEWLINE))
+ || (!superset && dfaisfast (regex->dfa)))
+ {
+ bool backref = false;
+
+ if (!dfaexec (regex->dfa, buf, buf + buflen, true, NULL, &backref))
+ return 0;
+
+ if (!regsize && (regex->flags & REG_NEWLINE) && !backref)
+ return 1;
+ }
+ }
+
+ /* If the buffer delimiter is not newline character, we cannot use
+ newline_anchor flag of regex. So do it line-by-line, and add offset
+ value to results. */
+ if ((regex->flags & REG_NEWLINE) && buffer_delimiter != '\n')
+ {
+ const char *beg, *end;
+ const char *start;
+
+ beg = buf;
+
+ if (buf_start_offset > 0)
+ {
+ const char *eol = memrchr (buf, buffer_delimiter, buf_start_offset);
+
+ if (eol != NULL)
+ beg = eol + 1;
+ }
+
+ start = buf + buf_start_offset;
+
+ for (;;)
+ {
+ end = memchr (beg, buffer_delimiter, buf + buflen - beg);
+
+ if (end == NULL)
+ end = buf + buflen;
+
+ ret = re_search (&regex->pattern, beg, end - beg,
+ start - beg, end - start,
+ regsize ? regarray : NULL);
+
+ if (ret > -1)
+ {
+ size_t i;
+
+ ret += beg - buf;
+
+ if (regsize)
+ {
+ for (i = 0; i < regarray->num_regs; ++i)
+ {
+ if (regarray->start[i] > -1)
+ regarray->start[i] += beg - buf;
+ if (regarray->end[i] > -1)
+ regarray->end[i] += beg - buf;
+ }
+ }
+
+ break;
+ }
+
+ if (end == buf + buflen)
+ break;
+
+ beg = start = end + 1;
+ }
+ }
+ else
+ ret = re_search (&regex->pattern, buf, buflen, buf_start_offset,
+ buflen - buf_start_offset,
+ regsize ? regarray : NULL);
+
+ return (ret > -1);
+}
+
+
+#ifdef lint
+void
+release_regex (struct regex *regex)
+{
+ if (regex->dfa)
+ {
+ dfafree (regex->dfa);
+ free (regex->dfa);
+ regex->dfa = NULL;
+ }
+ regfree (&regex->pattern);
+ free (regex);
+}
+#endif /* lint */
diff --git a/sed/sed.c b/sed/sed.c
new file mode 100644
index 0000000..e588c56
--- /dev/null
+++ b/sed/sed.c
@@ -0,0 +1,388 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1989-2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+
+#include "sed.h"
+
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "binary-io.h"
+#include "getopt.h"
+#include "progname.h"
+#include "version.h"
+#include "xalloc.h"
+
+#include "version-etc.h"
+
+#define AUTHORS \
+ _("Jay Fenlason"), \
+ _("Tom Lord"), \
+ _("Ken Pizzini"), \
+ _("Paolo Bonzini"), \
+ _("Jim Meyering"), \
+ _("Assaf Gordon")
+
+int extended_regexp_flags = 0;
+
+/* one-byte buffer delimiter */
+char buffer_delimiter = '\n';
+
+/* If set, fflush(stdout) on every line output. */
+bool unbuffered = false;
+
+/* If set, don't write out the line unless explicitly told to */
+bool no_default_output = false;
+
+/* If set, reset line counts on every new file. */
+bool separate_files = false;
+
+/* If set, follow symlinks when processing in place */
+bool follow_symlinks = false;
+
+/* If set, opearate in 'sandbox' mode */
+bool sandbox = false;
+
+/* if set, print debugging information */
+bool debug = false;
+
+/* How do we edit files in-place? (we don't if NULL) */
+char *in_place_extension = NULL;
+
+/* The mode to use to read/write files, either "r"/"w" or "rb"/"wb". */
+char const *read_mode = "r";
+char const *write_mode = "w";
+
+#if O_BINARY
+/* Additional flag for binary mode on platforms with O_BINARY/O_TEXT. */
+bool binary_mode = false;
+#endif
+
+/* Do we need to be pedantically POSIX compliant? */
+enum posixicity_types posixicity;
+
+/* How long should the `l' command's output line be? */
+countT lcmd_out_line_len = 70;
+
+/* The complete compiled SED program that we are going to run: */
+static struct vector *the_program = NULL;
+
+/* When we've created a temporary for an in-place update,
+ we may have to exit before the rename. This is the name
+ of the temporary that we'll have to unlink via an atexit-
+ registered cleanup function. */
+static char const *G_file_to_unlink;
+
+struct localeinfo localeinfo;
+
+/* When exiting between temporary file creation and the rename
+ associated with a sed -i invocation, remove that file. */
+static void
+cleanup (void)
+{
+ IF_LINT (free (in_place_extension));
+ if (G_file_to_unlink)
+ unlink (G_file_to_unlink);
+}
+
+/* Note that FILE must be removed upon exit. */
+void
+register_cleanup_file (char const *file)
+{
+ G_file_to_unlink = file;
+}
+
+/* Clear the global file-to-unlink global. */
+void
+cancel_cleanup (void)
+{
+ G_file_to_unlink = NULL;
+}
+
+static void usage (int);
+static void
+contact (int errmsg)
+{
+ FILE *out = errmsg ? stderr : stdout;
+ fprintf (out, _("GNU sed home page: <https://www.gnu.org/software/sed/>.\n\
+General help using GNU software: <https://www.gnu.org/gethelp/>.\n"));
+
+ /* Only print the bug report address for `sed --help', otherwise we'll
+ get reports for other people's bugs. */
+ if (!errmsg)
+ fprintf (out, _("E-mail bug reports to: <%s>.\n"), PACKAGE_BUGREPORT);
+}
+
+_Noreturn static void
+usage (int status)
+{
+ FILE *out = status ? stderr : stdout;
+
+ fprintf (out, _("\
+Usage: %s [OPTION]... {script-only-if-no-other-script} [input-file]...\n\
+\n"), program_name);
+
+ fprintf (out, _(" -n, --quiet, --silent\n\
+ suppress automatic printing of pattern space\n"));
+ fprintf (out, _(" --debug\n\
+ annotate program execution\n"));
+ fprintf (out, _(" -e script, --expression=script\n\
+ add the script to the commands to be executed\n"));
+ fprintf (out, _(" -f script-file, --file=script-file\n\
+ add the contents of script-file to the commands" \
+ " to be executed\n"));
+#ifdef ENABLE_FOLLOW_SYMLINKS
+ fprintf (out, _(" --follow-symlinks\n\
+ follow symlinks when processing in place\n"));
+#endif
+ fprintf (out, _(" -i[SUFFIX], --in-place[=SUFFIX]\n\
+ edit files in place (makes backup if SUFFIX supplied)\n"));
+#if O_BINARY
+ fprintf (out, _(" -b, --binary\n\
+ open files in binary mode (CR+LFs are not" \
+ " processed specially)\n"));
+#endif
+ fprintf (out, _(" -l N, --line-length=N\n\
+ specify the desired line-wrap length for the `l' command\n"));
+ fprintf (out, _(" --posix\n\
+ disable all GNU extensions.\n"));
+ fprintf (out, _(" -E, -r, --regexp-extended\n\
+ use extended regular expressions in the script\n\
+ (for portability use POSIX -E).\n"));
+ fprintf (out, _(" -s, --separate\n\
+ consider files as separate rather than as a single,\n\
+ continuous long stream.\n"));
+ fprintf (out, _(" --sandbox\n\
+ operate in sandbox mode (disable e/r/w commands).\n"));
+ fprintf (out, _(" -u, --unbuffered\n\
+ load minimal amounts of data from the input files and flush\n\
+ the output buffers more often\n"));
+ fprintf (out, _(" -z, --null-data\n\
+ separate lines by NUL characters\n"));
+ fprintf (out, _(" --help display this help and exit\n"));
+ fprintf (out, _(" --version output version information and exit\n"));
+ fprintf (out, _("\n\
+If no -e, --expression, -f, or --file option is given, then the first\n\
+non-option argument is taken as the sed script to interpret. All\n\
+remaining arguments are names of input files; if no input files are\n\
+specified, then the standard input is read.\n\
+\n"));
+ contact (status);
+
+ ck_fclose (NULL);
+ exit (status);
+}
+
+int
+main (int argc, char **argv)
+{
+#define SHORTOPTS "bsnrzuEe:f:l:i::V:"
+
+ enum { SANDBOX_OPTION = CHAR_MAX+1,
+ DEBUG_OPTION
+ };
+
+ static const struct option longopts[] = {
+ {"binary", 0, NULL, 'b'},
+ {"regexp-extended", 0, NULL, 'r'},
+ {"debug", 0, NULL, DEBUG_OPTION},
+ {"expression", 1, NULL, 'e'},
+ {"file", 1, NULL, 'f'},
+ {"in-place", 2, NULL, 'i'},
+ {"line-length", 1, NULL, 'l'},
+ {"null-data", 0, NULL, 'z'},
+ {"zero-terminated", 0, NULL, 'z'},
+ {"quiet", 0, NULL, 'n'},
+ {"posix", 0, NULL, 'p'},
+ {"silent", 0, NULL, 'n'},
+ {"sandbox", 0, NULL, SANDBOX_OPTION},
+ {"separate", 0, NULL, 's'},
+ {"unbuffered", 0, NULL, 'u'},
+ {"version", 0, NULL, 'v'},
+ {"help", 0, NULL, 'h'},
+#ifdef ENABLE_FOLLOW_SYMLINKS
+ {"follow-symlinks", 0, NULL, 'F'},
+#endif
+ {NULL, 0, NULL, 0}
+ };
+
+ int opt;
+ int return_code;
+ const char *cols = getenv ("COLS");
+
+ set_program_name (argv[0]);
+ initialize_main (&argc, &argv);
+#if HAVE_SETLOCALE
+ /* Set locale according to user's wishes. */
+ setlocale (LC_ALL, "");
+#endif
+ initialize_mbcs ();
+ init_localeinfo (&localeinfo);
+
+ /* Arrange to remove any un-renamed temporary file,
+ upon premature exit. */
+ atexit (cleanup);
+
+#if ENABLE_NLS
+
+ /* Tell program which translations to use and where to find. */
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+#endif
+
+ if (getenv ("POSIXLY_CORRECT") != NULL)
+ posixicity = POSIXLY_CORRECT;
+ else
+ posixicity = POSIXLY_EXTENDED;
+
+ /* If environment variable `COLS' is set, use its value for
+ the baseline setting of `lcmd_out_line_len'. The "-1"
+ is to avoid gratuitous auto-line-wrap on ttys.
+ */
+ if (cols)
+ {
+ countT t = atoi (cols);
+ if (t > 1)
+ lcmd_out_line_len = t-1;
+ }
+
+ while ((opt = getopt_long (argc, argv, SHORTOPTS, longopts, NULL)) != EOF)
+ {
+ switch (opt)
+ {
+ case 'n':
+ no_default_output = true;
+ break;
+ case 'e':
+ the_program = compile_string (the_program, optarg, strlen (optarg));
+ break;
+ case 'f':
+ the_program = compile_file (the_program, optarg);
+ break;
+
+ case 'z':
+ buffer_delimiter = 0;
+ break;
+
+ case 'F':
+ follow_symlinks = true;
+ break;
+
+ case 'i':
+ separate_files = true;
+ IF_LINT (free (in_place_extension));
+ if (optarg == NULL)
+ /* use no backups */
+ in_place_extension = xstrdup ("*");
+
+ else if (strchr (optarg, '*') != NULL)
+ in_place_extension = xstrdup (optarg);
+
+ else
+ {
+ in_place_extension = XCALLOC (strlen (optarg) + 2, char);
+ in_place_extension[0] = '*';
+ strcpy (in_place_extension + 1, optarg);
+ }
+
+ break;
+
+ case 'l':
+ lcmd_out_line_len = atoi (optarg);
+ break;
+
+ case 'p':
+ posixicity = POSIXLY_BASIC;
+ break;
+
+ case 'b':
+ read_mode = "rb";
+ write_mode = "wb";
+#if O_BINARY
+ binary_mode = true;
+#endif
+ break;
+
+ case 'E':
+ case 'r':
+ extended_regexp_flags = REG_EXTENDED;
+ break;
+
+ case 's':
+ separate_files = true;
+ break;
+
+ case SANDBOX_OPTION:
+ sandbox = true;
+ break;
+
+ case DEBUG_OPTION:
+ debug = true;
+ break;
+
+ case 'u':
+ unbuffered = true;
+ break;
+
+ case 'v':
+ version_etc (stdout, program_name, PACKAGE_NAME, Version,
+ AUTHORS, (char *) NULL);
+ contact (false);
+ ck_fclose (NULL);
+ exit (EXIT_SUCCESS);
+ case 'h':
+ usage (EXIT_SUCCESS);
+ default:
+ usage (EXIT_BAD_USAGE);
+ }
+ }
+
+ if (!the_program)
+ {
+ if (optind < argc)
+ {
+ char *arg = argv[optind++];
+ the_program = compile_string (the_program, arg, strlen (arg));
+ }
+ else
+ usage (EXIT_BAD_USAGE);
+ }
+ check_final_program (the_program);
+
+#if O_BINARY
+ if (binary_mode)
+ {
+ if (set_binary_mode ( fileno (stdin), O_BINARY) == -1)
+ panic (_("failed to set binary mode on STDIN"));
+ if (set_binary_mode ( fileno (stdout), O_BINARY) == -1)
+ panic (_("failed to set binary mode on STDOUT"));
+ }
+#endif
+
+ if (debug)
+ debug_print_program (the_program);
+
+ return_code = process_files (the_program, argv+optind);
+
+ finish_program (the_program);
+ ck_fclose (NULL);
+
+ return return_code;
+}
diff --git a/sed/sed.h b/sed/sed.h
new file mode 100644
index 0000000..cb28736
--- /dev/null
+++ b/sed/sed.h
@@ -0,0 +1,293 @@
+/* GNU SED, a batch stream editor.
+ Copyright (C) 1989-2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include "basicdefs.h"
+#include "dfa.h"
+#include "localeinfo.h"
+#include "regex.h"
+#include <stdio.h>
+#include "unlocked-io.h"
+
+#include "utils.h"
+
+/* Struct vector is used to describe a compiled sed program. */
+struct vector {
+ struct sed_cmd *v; /* a dynamically allocated array */
+ size_t v_allocated; /* ... number of slots allocated */
+ size_t v_length; /* ... number of slots in use */
+};
+
+/* This structure tracks files used by sed so that they may all be
+ closed cleanly at normal program termination. A flag is kept that tells
+ if a missing newline was encountered, so that it is added on the
+ next line and the two lines are not concatenated. */
+struct output {
+ char *name;
+ bool missing_newline;
+ FILE *fp;
+ struct output *link;
+};
+
+struct text_buf {
+ char *text;
+ size_t text_length;
+};
+
+struct regex {
+ regex_t pattern;
+ int flags;
+ size_t sz;
+ struct dfa *dfa;
+ bool begline;
+ bool endline;
+ char re[1];
+};
+
+enum replacement_types {
+ REPL_ASIS = 0,
+ REPL_UPPERCASE = 1,
+ REPL_LOWERCASE = 2,
+ REPL_UPPERCASE_FIRST = 4,
+ REPL_LOWERCASE_FIRST = 8,
+ REPL_MODIFIERS = REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST,
+
+ /* These are given to aid in debugging */
+ REPL_UPPERCASE_UPPERCASE = REPL_UPPERCASE_FIRST | REPL_UPPERCASE,
+ REPL_UPPERCASE_LOWERCASE = REPL_UPPERCASE_FIRST | REPL_LOWERCASE,
+ REPL_LOWERCASE_UPPERCASE = REPL_LOWERCASE_FIRST | REPL_UPPERCASE,
+ REPL_LOWERCASE_LOWERCASE = REPL_LOWERCASE_FIRST | REPL_LOWERCASE
+};
+
+enum text_types {
+ TEXT_BUFFER,
+ TEXT_REPLACEMENT,
+ TEXT_REGEX
+};
+
+enum posixicity_types {
+ POSIXLY_EXTENDED, /* with GNU extensions */
+ POSIXLY_CORRECT, /* with POSIX-compatible GNU extensions */
+ POSIXLY_BASIC /* pedantically POSIX */
+};
+
+enum addr_state {
+ RANGE_INACTIVE, /* never been active */
+ RANGE_ACTIVE, /* between first and second address */
+ RANGE_CLOSED /* like RANGE_INACTIVE, but range has ended once */
+};
+
+enum addr_types {
+ ADDR_IS_NULL, /* null address */
+ ADDR_IS_REGEX, /* a.addr_regex is valid */
+ ADDR_IS_NUM, /* a.addr_number is valid */
+ ADDR_IS_NUM_MOD, /* a.addr_number is valid, addr_step is modulo */
+ ADDR_IS_STEP, /* address is +N (only valid for addr2) */
+ ADDR_IS_STEP_MOD, /* address is ~N (only valid for addr2) */
+ ADDR_IS_LAST /* address is $ */
+};
+
+struct addr {
+ enum addr_types addr_type;
+ countT addr_number;
+ countT addr_step;
+ struct regex *addr_regex;
+};
+
+
+struct replacement {
+ char *prefix;
+ size_t prefix_length;
+ int subst_id;
+ enum replacement_types repl_type;
+ struct replacement *next;
+};
+
+struct subst {
+ struct regex *regx;
+ struct replacement *replacement;
+ countT numb; /* if >0, only substitute for match number "numb" */
+ struct output *outf; /* 'w' option given */
+ unsigned global : 1; /* 'g' option given */
+ unsigned print : 2; /* 'p' option given (before/after eval) */
+ unsigned eval : 1; /* 'e' option given */
+ unsigned max_id : 4; /* maximum backreference on the RHS */
+#ifdef lint
+ char* replacement_buffer;
+#endif
+};
+
+
+
+
+struct sed_cmd {
+ struct addr *a1; /* save space: usually is NULL */
+ struct addr *a2;
+
+ /* See description the enum, above. */
+ enum addr_state range_state;
+
+ /* Non-zero if command is to be applied to non-matches. */
+ char addr_bang;
+
+ /* The actual command character. */
+ char cmd;
+
+ /* auxiliary data for various commands */
+ union {
+ /* This structure is used for a, i, and c commands. */
+ struct text_buf cmd_txt;
+
+ /* This is used for the l, q and Q commands. */
+ int int_arg;
+
+ /* This is used for the {}, b, and t commands. */
+ countT jump_index;
+
+ /* This is used for the r command. */
+ char *fname;
+
+ /* This is used for the hairy s command. */
+ struct subst *cmd_subst;
+
+ /* This is used for the w command. */
+ struct output *outf;
+
+ /* This is used for the R command.
+ (despite the struct name, it is used for both in and out files). */
+ struct output *inf;
+
+ /* This is used for the y command. */
+ unsigned char *translate;
+ char **translatemb;
+
+ /* This is used for the ':' command (debug only). */
+ char* label_name;
+ } x;
+};
+
+
+_Noreturn void bad_prog (const char *why);
+size_t normalize_text (char *text, size_t len, enum text_types buftype);
+struct vector *compile_string (struct vector *, char *str, size_t len);
+struct vector *compile_file (struct vector *, const char *cmdfile);
+void check_final_program (struct vector *);
+void rewind_read_files (void);
+void finish_program (struct vector *);
+
+struct regex *compile_regex (struct buffer *b, int flags, int needed_sub);
+int match_regex (struct regex *regex,
+ char *buf, size_t buflen, size_t buf_start_offset,
+ struct re_registers *regarray, int regsize);
+#ifdef lint
+void release_regex (struct regex *);
+#endif
+
+void
+debug_print_command (const struct vector *program, const struct sed_cmd *sc);
+void
+debug_print_program (const struct vector *program);
+void
+debug_print_char (char c);
+
+int process_files (struct vector *, char **argv);
+
+int main (int, char **);
+
+extern struct localeinfo localeinfo;
+
+extern int extended_regexp_flags;
+
+/* one-byte buffer delimiter */
+extern char buffer_delimiter;
+
+/* If set, fflush(stdout) on every line output,
+ and turn off stream buffering on inputs. */
+extern bool unbuffered;
+
+/* If set, don't write out the line unless explicitly told to. */
+extern bool no_default_output;
+
+/* If set, reset line counts on every new file. */
+extern bool separate_files;
+
+/* If set, follow symlinks when invoked with -i option */
+extern bool follow_symlinks;
+
+/* Do we need to be pedantically POSIX compliant? */
+extern enum posixicity_types posixicity;
+
+/* How long should the `l' command's output line be? */
+extern countT lcmd_out_line_len;
+
+/* How do we edit files in-place? (we don't if NULL) */
+extern char *in_place_extension;
+
+/* The mode to use to read and write files, either "rt"/"w" or "rb"/"wb". */
+extern char const *read_mode;
+extern char const *write_mode;
+
+/* Should we use EREs? */
+extern bool use_extended_syntax_p;
+
+/* Declarations for multibyte character sets. */
+extern int mb_cur_max;
+extern bool is_utf8;
+
+/* If set, operate in 'sandbox' mode - disable e/r/w commands */
+extern bool sandbox;
+
+/* If set, print debugging information. */
+extern bool debug;
+
+#define MBRTOWC(pwc, s, n, ps) \
+ (mb_cur_max == 1 ? \
+ (*(pwc) = btowc (*(unsigned char *) (s)), 1) : \
+ mbrtowc ((pwc), (s), (n), (ps)))
+
+#define WCRTOMB(s, wc, ps) \
+ (mb_cur_max == 1 ? \
+ (*(s) = wctob ((wint_t) (wc)), 1) : \
+ wcrtomb ((s), (wc), (ps)))
+
+#define MBSINIT(s) \
+ (mb_cur_max == 1 ? 1 : mbsinit ((s)))
+
+#define MBRLEN(s, n, ps) \
+ (mb_cur_max == 1 ? 1 : mbrtowc (NULL, s, n, ps))
+
+#define IS_MB_CHAR(ch, ps) \
+ (mb_cur_max == 1 ? 0 : is_mb_char (ch, ps))
+
+extern int is_mb_char (int ch, mbstate_t *ps);
+extern void initialize_mbcs (void);
+extern void register_cleanup_file (char const *file);
+extern void cancel_cleanup (void);
+
+/* Use this to suppress gcc's '...may be used before initialized' warnings. */
+#ifdef lint
+# define IF_LINT(Code) Code
+#else
+# define IF_LINT(Code) /* empty */
+#endif
+
+#ifndef FALLTHROUGH
+# if __GNUC__ < 7
+# define FALLTHROUGH ((void) 0)
+# else
+# define FALLTHROUGH __attribute__ ((__fallthrough__))
+# endif
+#endif
diff --git a/sed/utils.c b/sed/utils.c
new file mode 100644
index 0000000..6f847cd
--- /dev/null
+++ b/sed/utils.c
@@ -0,0 +1,499 @@
+/* Functions from hack's utils library.
+ Copyright (C) 1989-2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <limits.h>
+
+#include "binary-io.h"
+#include "unlocked-io.h"
+#include "utils.h"
+#include "progname.h"
+#include "fwriting.h"
+#include "xalloc.h"
+
+#if O_BINARY
+extern bool binary_mode;
+#endif
+
+/* Store information about files opened with ck_fopen
+ so that error messages from ck_fread, ck_fwrite, etc. can print the
+ name of the file that had the error */
+
+struct open_file
+ {
+ FILE *fp;
+ char *name;
+ struct open_file *link;
+ unsigned temp : 1;
+ };
+
+static struct open_file *open_files = NULL;
+static void do_ck_fclose (FILE *fp);
+
+/* Print an error message and exit */
+
+void
+panic (const char *str, ...)
+{
+ va_list ap;
+
+ fprintf (stderr, "%s: ", program_name);
+ va_start (ap, str);
+ vfprintf (stderr, str, ap);
+ va_end (ap);
+ putc ('\n', stderr);
+
+ /* Unlink the temporary files. */
+ while (open_files)
+ {
+ if (open_files->temp)
+ {
+ fclose (open_files->fp);
+ errno = 0;
+ unlink (open_files->name);
+ if (errno != 0)
+ fprintf (stderr, _("cannot remove %s: %s"), open_files->name,
+ strerror (errno));
+ }
+
+#ifdef lint
+ struct open_file *next = open_files->link;
+ free (open_files->name);
+ free (open_files);
+ open_files = next;
+#else
+ open_files = open_files->link;
+#endif
+ }
+
+ exit (EXIT_PANIC);
+}
+
+/* Internal routine to get a filename from open_files */
+static const char * _GL_ATTRIBUTE_PURE
+utils_fp_name (FILE *fp)
+{
+ struct open_file *p;
+
+ for (p=open_files; p; p=p->link)
+ if (p->fp == fp)
+ return p->name;
+ if (fp == stdin)
+ return "stdin";
+ else if (fp == stdout)
+ return "stdout";
+ else if (fp == stderr)
+ return "stderr";
+
+ return "<unknown>";
+}
+
+static void
+register_open_file (FILE *fp, const char *name)
+{
+ struct open_file *p;
+ for (p=open_files; p; p=p->link)
+ {
+ if (fp == p->fp)
+ {
+ free (p->name);
+ break;
+ }
+ }
+ if (!p)
+ {
+ p = XCALLOC (1, struct open_file);
+ p->link = open_files;
+ open_files = p;
+ }
+ p->name = xstrdup (name);
+ p->fp = fp;
+ p->temp = false;
+}
+
+/* Panic on failing fopen */
+FILE *
+ck_fopen (const char *name, const char *mode, int fail)
+{
+ FILE *fp;
+
+ fp = fopen (name, mode);
+ if (!fp)
+ {
+ if (fail)
+ panic (_("couldn't open file %s: %s"), name, strerror (errno));
+
+ return NULL;
+ }
+
+ register_open_file (fp, name);
+ return fp;
+}
+
+/* Panic on failing fdopen */
+FILE *
+ck_fdopen ( int fd, const char *name, const char *mode, int fail)
+{
+ FILE *fp;
+
+ fp = fdopen (fd, mode);
+ if (!fp)
+ {
+ if (fail)
+ panic (_("couldn't attach to %s: %s"), name, strerror (errno));
+
+ return NULL;
+ }
+
+ register_open_file (fp, name);
+ return fp;
+}
+
+FILE *
+ck_mkstemp (char **p_filename, const char *tmpdir,
+ const char *base, const char *mode)
+{
+ char *template = xmalloc (strlen (tmpdir) + strlen (base) + 8);
+ sprintf (template, "%s/%sXXXXXX", tmpdir, base);
+
+ /* The ownership might change, so omit some permissions at first
+ so unauthorized users cannot nip in before the file is ready.
+ mkstemp forces O_BINARY on cygwin, so use mkostemp instead. */
+ mode_t save_umask = umask (0700);
+ int fd = mkostemp (template, 0);
+ umask (save_umask);
+ if (fd == -1)
+ panic (_("couldn't open temporary file %s: %s"), template,
+ strerror (errno));
+#if O_BINARY
+ if (binary_mode && (set_binary_mode ( fd, O_BINARY) == -1))
+ panic (_("failed to set binary mode on '%s'"), template);
+#endif
+
+ *p_filename = template;
+ FILE *fp = fdopen (fd, mode);
+ register_open_file (fp, template);
+ return fp;
+}
+
+/* Panic on failing fwrite */
+void
+ck_fwrite (const void *ptr, size_t size, size_t nmemb, FILE *stream)
+{
+ clearerr (stream);
+ if (size && fwrite (ptr, size, nmemb, stream) != nmemb)
+ panic (ngettext ("couldn't write %llu item to %s: %s",
+ "couldn't write %llu items to %s: %s", nmemb),
+ (unsigned long long) nmemb, utils_fp_name (stream),
+ strerror (errno));
+}
+
+/* Panic on failing fread */
+size_t
+ck_fread (void *ptr, size_t size, size_t nmemb, FILE *stream)
+{
+ clearerr (stream);
+ if (size && (nmemb=fread (ptr, size, nmemb, stream)) <= 0 && ferror (stream))
+ panic (_("read error on %s: %s"), utils_fp_name (stream), strerror (errno));
+
+ return nmemb;
+}
+
+size_t
+ck_getdelim (char **text, size_t *buflen, char buffer_delimiter, FILE *stream)
+{
+ ssize_t result;
+ bool error;
+
+ error = ferror (stream);
+ if (!error)
+ {
+ result = getdelim (text, buflen, buffer_delimiter, stream);
+ error = ferror (stream);
+ }
+
+ if (error)
+ panic (_("read error on %s: %s"), utils_fp_name (stream), strerror (errno));
+
+ return result;
+}
+
+/* Panic on failing fflush */
+void
+ck_fflush (FILE *stream)
+{
+ if (!fwriting (stream))
+ return;
+
+ clearerr (stream);
+ if (fflush (stream) == EOF && errno != EBADF)
+ panic ("couldn't flush %s: %s", utils_fp_name (stream), strerror (errno));
+}
+
+/* Panic on failing fclose */
+void
+ck_fclose (FILE *stream)
+{
+ struct open_file r;
+ struct open_file *prev;
+ struct open_file *cur;
+
+ /* a NULL stream means to close all files */
+ r.link = open_files;
+ prev = &r;
+ while ( (cur = prev->link) )
+ {
+ if (!stream || stream == cur->fp)
+ {
+ do_ck_fclose (cur->fp);
+ prev->link = cur->link;
+ free (cur->name);
+ free (cur);
+ }
+ else
+ prev = cur;
+ }
+
+ open_files = r.link;
+
+ /* Also care about stdout, because if it is redirected the
+ last output operations might fail and it is important
+ to signal this as an error (perhaps to make). */
+ if (!stream)
+ do_ck_fclose (stdout);
+}
+
+/* Close a single file. */
+void
+do_ck_fclose (FILE *fp)
+{
+ ck_fflush (fp);
+ clearerr (fp);
+
+ if (fclose (fp) == EOF)
+ panic ("couldn't close %s: %s", utils_fp_name (fp), strerror (errno));
+}
+
+/* Follow symlink and panic if something fails. Return the ultimate
+ symlink target, stored in a temporary buffer that the caller should
+ not free. */
+const char *
+follow_symlink (const char *fname)
+{
+#ifdef ENABLE_FOLLOW_SYMLINKS
+ static char *buf1, *buf2;
+ static int buf_size;
+
+ struct stat statbuf;
+ const char *buf = fname, *c;
+ int rc;
+
+ if (buf_size == 0)
+ {
+ buf1 = xzalloc (PATH_MAX + 1);
+ buf2 = xzalloc (PATH_MAX + 1);
+ buf_size = PATH_MAX + 1;
+ }
+
+ while ((rc = lstat (buf, &statbuf)) == 0
+ && (statbuf.st_mode & S_IFLNK) == S_IFLNK)
+ {
+ if (buf == buf2)
+ {
+ strcpy (buf1, buf2);
+ buf = buf1;
+ }
+
+ while ((rc = readlink (buf, buf2, buf_size)) == buf_size)
+ {
+ buf_size *= 2;
+ buf1 = xrealloc (buf1, buf_size);
+ buf2 = xrealloc (buf2, buf_size);
+ }
+ if (rc < 0)
+ panic (_("couldn't follow symlink %s: %s"), buf, strerror (errno));
+ else
+ buf2 [rc] = '\0';
+
+ if (buf2[0] != '/' && (c = strrchr (buf, '/')) != NULL)
+ {
+ /* Need to handle relative paths with care. Reallocate buf1 and
+ buf2 to be big enough. */
+ int len = c - buf + 1;
+ if (len + rc + 1 > buf_size)
+ {
+ buf_size = len + rc + 1;
+ buf1 = xrealloc (buf1, buf_size);
+ buf2 = xrealloc (buf2, buf_size);
+ }
+
+ /* Always store the new path in buf1. */
+ if (buf != buf1)
+ memcpy (buf1, buf, len);
+
+ /* Tack the relative symlink at the end of buf1. */
+ memcpy (buf1 + len, buf2, rc + 1);
+ buf = buf1;
+ }
+ else
+ {
+ /* Use buf2 as the buffer, it saves a strcpy if it is not pointing to
+ another link. It works for absolute symlinks, and as long as
+ symlinks do not leave the current directory. */
+ buf = buf2;
+ }
+ }
+
+ if (rc < 0)
+ panic (_("cannot stat %s: %s"), buf, strerror (errno));
+
+ return buf;
+#else
+ return fname;
+#endif /* ENABLE_FOLLOW_SYMLINKS */
+}
+
+/* Panic on failing rename */
+void
+ck_rename (const char *from, const char *to, const char *unlink_if_fail)
+{
+ int rd = rename (from, to);
+ if (rd != -1)
+ return;
+
+ if (unlink_if_fail)
+ {
+ int save_errno = errno;
+ errno = 0;
+ unlink (unlink_if_fail);
+
+ /* Failure to remove the temporary file is more severe,
+ so trigger it first. */
+ if (errno != 0)
+ panic (_("cannot remove %s: %s"), unlink_if_fail, strerror (errno));
+
+ errno = save_errno;
+ }
+
+ panic (_("cannot rename %s: %s"), from, strerror (errno));
+}
+
+
+
+
+/* Implement a variable sized buffer of `stuff'. We don't know what it is,
+nor do we care, as long as it doesn't mind being aligned by malloc. */
+
+struct buffer
+ {
+ size_t allocated;
+ size_t length;
+ char *b;
+ };
+
+#define MIN_ALLOCATE 50
+
+struct buffer *
+init_buffer (void)
+{
+ struct buffer *b = XCALLOC (1, struct buffer);
+ b->b = XCALLOC (MIN_ALLOCATE, char);
+ b->allocated = MIN_ALLOCATE;
+ b->length = 0;
+ return b;
+}
+
+char *
+get_buffer (struct buffer const *b)
+{
+ return b->b;
+}
+
+size_t
+size_buffer (struct buffer const *b)
+{
+ return b->length;
+}
+
+static void
+resize_buffer (struct buffer *b, size_t newlen)
+{
+ char *try = NULL;
+ size_t alen = b->allocated;
+
+ if (newlen <= alen)
+ return;
+ alen *= 2;
+ if (newlen < alen)
+ try = realloc (b->b, alen); /* Note: *not* the REALLOC() macro! */
+ if (!try)
+ {
+ alen = newlen;
+ try = REALLOC (b->b, alen, char);
+ }
+ b->allocated = alen;
+ b->b = try;
+}
+
+char *
+add_buffer (struct buffer *b, const char *p, size_t n)
+{
+ char *result;
+ if (b->allocated - b->length < n)
+ resize_buffer (b, b->length+n);
+ result = memcpy (b->b + b->length, p, n);
+ b->length += n;
+ return result;
+}
+
+char *
+add1_buffer (struct buffer *b, int c)
+{
+ /* This special case should be kept cheap;
+ * don't make it just a mere convenience
+ * wrapper for add_buffer() -- even "builtin"
+ * versions of memcpy(a, b, 1) can become
+ * expensive when called too often.
+ */
+ if (c != EOF)
+ {
+ char *result;
+ if (b->allocated - b->length < 1)
+ resize_buffer (b, b->length+1);
+ result = b->b + b->length++;
+ *result = c;
+ return result;
+ }
+
+ return NULL;
+}
+
+void
+free_buffer (struct buffer *b)
+{
+ if (b)
+ free (b->b);
+ free (b);
+}
diff --git a/sed/utils.h b/sed/utils.h
new file mode 100644
index 0000000..810ac9f
--- /dev/null
+++ b/sed/utils.h
@@ -0,0 +1,52 @@
+/* Functions from hack's utils library.
+ Copyright (C) 1989-2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; If not, see <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+
+#include "basicdefs.h"
+
+enum exit_codes {
+ /* EXIT_SUCCESS is already defined as 0 */
+ EXIT_BAD_USAGE = 1, /* bad program syntax, invalid command-line options */
+ EXIT_BAD_INPUT = 2, /* failed to open some of the input files */
+ EXIT_PANIC = 4 /* PANIC during program execution */
+};
+
+
+_Noreturn void panic (const char *str, ...) _GL_ATTRIBUTE_FORMAT_PRINTF (1, 2);
+
+FILE *ck_fopen (const char *name, const char *mode, int fail);
+FILE *ck_fdopen (int fd, const char *name, const char *mode, int fail);
+void ck_fwrite (const void *ptr, size_t size, size_t nmemb, FILE *stream);
+size_t ck_fread (void *ptr, size_t size, size_t nmemb, FILE *stream);
+void ck_fflush (FILE *stream);
+void ck_fclose (FILE *stream);
+const char *follow_symlink (const char *path);
+size_t ck_getdelim (char **text, size_t *buflen, char buffer_delimiter,
+ FILE *stream);
+FILE * ck_mkstemp (char **p_filename, const char *tmpdir, const char *base,
+ const char *mode) _GL_ARG_NONNULL ((1, 2, 3, 4));
+void ck_rename (const char *from, const char *to, const char *unlink_if_fail);
+
+void *ck_malloc (size_t size);
+void *ck_realloc (void *ptr, size_t size);
+
+struct buffer *init_buffer (void);
+char *get_buffer (struct buffer const *b) _GL_ATTRIBUTE_PURE;
+size_t size_buffer (struct buffer const *b) _GL_ATTRIBUTE_PURE;
+char *add_buffer (struct buffer *b, const char *p, size_t n);
+char *add1_buffer (struct buffer *b, int ch);
+void free_buffer (struct buffer *b);