summaryrefslogtreecommitdiff
path: root/Docs/src/bin/halibut/input.c
diff options
context:
space:
mode:
authorDidier Raboud <odyx@debian.org>2018-03-31 20:38:19 +0200
committerDidier Raboud <odyx@debian.org>2018-03-31 20:38:19 +0200
commitf05798f0619384fdb055f634ca4233378f2779dd (patch)
treeb1f9b212f77580c824cc765ac3778fc6c8f4d4d8 /Docs/src/bin/halibut/input.c
parent59c41c0897494001ced424157660d4ee59bb5426 (diff)
Import Upstream version 2.32
Diffstat (limited to 'Docs/src/bin/halibut/input.c')
-rwxr-xr-xDocs/src/bin/halibut/input.c2976
1 files changed, 1488 insertions, 1488 deletions
diff --git a/Docs/src/bin/halibut/input.c b/Docs/src/bin/halibut/input.c
index f8e4f71..c14f10e 100755
--- a/Docs/src/bin/halibut/input.c
+++ b/Docs/src/bin/halibut/input.c
@@ -1,1488 +1,1488 @@
-/*
- * input.c: read the source form
- */
-
-#include <stdio.h>
-#include <assert.h>
-#include <time.h>
-#include "halibut.h"
-
-#define TAB_STOP 8 /* for column number tracking */
-
-static void setpos(input * in, char *fname)
-{
- in->pos.filename = fname;
- in->pos.line = 1;
- in->pos.col = (in->reportcols ? 1 : -1);
-}
-
-static void unget(input * in, int c, filepos * pos)
-{
- if (in->npushback >= in->pushbacksize)
- {
- in->pushbacksize = in->npushback + 16;
- in->pushback = resize(in->pushback, in->pushbacksize);
- }
- in->pushback[in->npushback].chr = c;
- in->pushback[in->npushback].pos = *pos; /* structure copy */
- in->npushback++;
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- * Macro subsystem
- */
-typedef struct macro_Tag macro;
-struct macro_Tag {
- wchar_t *name, *text;
-};
-struct macrostack_Tag {
- macrostack *next;
- wchar_t *text;
- int ptr, npushback;
- filepos pos;
-};
-static int macrocmp(void *av, void *bv)
-{
- macro *a = (macro *) av, *b = (macro *) bv;
- return ustrcmp(a->name, b->name);
-}
-static void
-macrodef(tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos)
-{
- macro *m = mknew(macro);
- m->name = name;
- m->text = text;
- if (add234(macros, m) != m)
- {
- error(err_macroexists, &fpos, name);
- sfree(name);
- sfree(text);
- }
-}
-static int
-macrolookup(tree234 * macros, input * in, wchar_t * name, filepos * pos)
-{
- macro m, *gotit;
- m.name = name;
- gotit = find234(macros, &m, NULL);
- if (gotit)
- {
- macrostack *expansion = mknew(macrostack);
- expansion->next = in->stack;
- expansion->text = gotit->text;
- expansion->pos = *pos; /* structure copy */
- expansion->ptr = 0;
- expansion->npushback = in->npushback;
- in->stack = expansion;
- return TRUE;
- } else
- return FALSE;
-}
-static void macrocleanup(tree234 * macros)
-{
- int ti;
- macro *m;
- for (ti = 0; (m = (macro *) index234(macros, ti)) != NULL; ti++)
- {
- sfree(m->name);
- sfree(m->text);
- sfree(m);
- }
- freetree234(macros);
-}
-
-/*
- * Can return EOF
- */
-static int get(input * in, filepos * pos)
-{
- int pushbackpt = in->stack ? in->stack->npushback : 0;
- if (in->npushback > pushbackpt)
- {
- --in->npushback;
- if (pos)
- *pos = in->pushback[in->npushback].pos; /* structure copy */
- return in->pushback[in->npushback].chr;
- } else if (in->stack)
- {
- wchar_t c = in->stack->text[in->stack->ptr];
- if (in->stack->text[++in->stack->ptr] == L'\0')
- {
- macrostack *tmp = in->stack;
- in->stack = tmp->next;
- sfree(tmp);
- }
- return c;
- } else if (in->currfp)
- {
- int c = getc(in->currfp);
-
- if (c == EOF)
- {
- fclose(in->currfp);
- in->currfp = NULL;
- }
- /* Track line numbers, for error reporting */
- if (pos)
- *pos = in->pos;
- if (in->reportcols)
- {
- switch (c)
- {
- case '\t':
- in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP;
- break;
- case '\n':
- in->pos.col = 1;
- in->pos.line++;
- break;
- default:
- in->pos.col++;
- break;
- }
- } else
- {
- in->pos.col = -1;
- if (c == '\n')
- in->pos.line++;
- }
- /* FIXME: do input charmap translation. We should be returning
- * Unicode here. */
- return c;
- } else
- return EOF;
-}
-
-/*
- * Lexical analysis of source files.
- */
-typedef struct token_Tag token;
-struct token_Tag {
- int type;
- int cmd, aux;
- wchar_t *text;
- filepos pos;
-};
-enum {
- tok_eof, /* end of file */
- tok_eop, /* end of paragraph */
- tok_white, /* whitespace */
- tok_word, /* a word or word fragment */
- tok_cmd, /* \command */
- tok_lbrace, /* { */
- tok_rbrace /* } */
-};
-
-/* Halibut command keywords. */
-enum {
- c__invalid, /* invalid command */
- c__comment, /* comment command (\#) */
- c__escaped, /* escaped character */
- c__nbsp, /* nonbreaking space */
- c_A, /* appendix heading */
- c_B, /* bibliography entry */
- c_BR, /* bibliography rewrite */
- c_C, /* chapter heading */
- c_H, /* heading */
- c_I, /* invisible index mark */
- c_IM, /* index merge/rewrite */
- c_K, /* capitalised cross-reference */
- c_S, /* aux field is 0, 1, 2, ... */
- c_U, /* unnumbered-chapter heading */
- c_W, /* Web hyperlink */
- c_L, /* Relative/local hyperlink */
- c_b, /* bulletted list */
- c_c, /* code */
- c_cfg, /* configuration directive */
- c_copyright, /* copyright statement */
- c_cw, /* weak code */
- c_date, /* document processing date */
- c_define, /* macro definition */
- c_e, /* emphasis */
- c_i, /* visible index mark */
- c_ii, /* uncapitalised visible index mark */
- c_k, /* uncapitalised cross-reference */
- c_R, /* free text cross-reference */
- c_n, /* numbered list */
- c_nocite, /* bibliography trickery */
- c_preamble, /* document preamble text */
- c_q, /* quote marks */
- c_rule, /* horizontal rule */
- c_title, /* document title */
- c_u, /* aux field is char code */
- c_versionid /* document RCS id */
-};
-
-/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
-#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
-#define isnl(c) ( (c)==10 )
-#define isdec(c) ( ((c)>='0'&&(c)<='9') )
-#define fromdec(c) ( (c)-'0' )
-#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
-#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
-#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
-
-/*
- * Keyword comparison function. Like strcmp, but between a wchar_t *
- * and a char *.
- */
-static int kwcmp(wchar_t const *p, char const *q)
-{
- int i;
- do
- {
- i = *p - *q;
- }
- while (*p++ && *q++ && !i);
- return i;
-}
-
-/*
- * Match a keyword.
- */
-static void match_kw(token * tok)
-{
- /*
- * FIXME. The ids are explicit in here so as to allow long-name
- * equivalents to the various very short keywords.
- *
- * This list must be sorted, it's searched using binary search.
- */
- static const struct {
- char const *name;
- int id;
- } keywords[] = {
- {
- "#", c__comment}
- , /* comment command (\#) */
- {
- "-", c__escaped}
- , /* nonbreaking hyphen */
- {
- "A", c_A}
- , /* appendix heading */
- {
- "B", c_B}
- , /* bibliography entry */
- {
- "BR", c_BR}
- , /* bibliography rewrite */
- {
- "C", c_C}
- , /* chapter heading */
- {
- "H", c_H}
- , /* heading */
- {
- "I", c_I}
- , /* invisible index mark */
- {
- "IM", c_IM}
- , /* index merge/rewrite */
- {
- "K", c_K}
- , /* capitalised cross-reference */
- {
- "L", c_L}
- , /* Relative/local hyperlink */
- {
- "R", c_R}
- , /* free text cross-reference */
- {
- "U", c_U}
- , /* unnumbered-chapter heading */
- {
- "W", c_W}
- , /* Web hyperlink */
- {
- "\\", c__escaped}
- , /* escaped backslash (\\) */
- {
- "_", c__nbsp}
- , /* nonbreaking space (\_) */
- {
- "b", c_b}
- , /* bulletted list */
- {
- "c", c_c}
- , /* code */
- {
- "cfg", c_cfg}
- , /* configuration directive */
- {
- "copyright", c_copyright}
- , /* copyright statement */
- {
- "cw", c_cw}
- , /* weak code */
- {
- "date", c_date}
- , /* document processing date */
- {
- "define", c_define}
- , /* macro definition */
- {
- "e", c_e}
- , /* emphasis */
- {
- "i", c_i}
- , /* visible index mark */
- {
- "ii", c_ii}
- , /* uncapitalised visible index mark */
- {
- "k", c_k}
- , /* uncapitalised cross-reference */
- {
- "n", c_n}
- , /* numbered list */
- {
- "nocite", c_nocite}
- , /* bibliography trickery */
- {
- "preamble", c_preamble}
- , /* document preamble text */
- {
- "q", c_q}
- , /* quote marks */
- {
- "rule", c_rule}
- , /* horizontal rule */
- {
- "title", c_title}
- , /* document title */
- {
- "versionid", c_versionid}
- , /* document RCS id */
- {
- "{", c__escaped}
- , /* escaped lbrace (\{) */
- {
- "}", c__escaped}
- , /* escaped rbrace (\}) */
- };
- int i, j, k, c;
-
- /*
- * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
- * doesn't match correctly, we just fall through to the
- * binary-search phase.
- */
- if (tok->text[0] == 'S')
- {
- /* We expect numeric characters thereafter. */
- wchar_t *p = tok->text + 1;
- int n;
- if (!*p)
- n = 1;
- else
- {
- n = 0;
- while (*p && isdec(*p))
- {
- n = 10 * n + fromdec(*p);
- p++;
- }
- }
- if (!*p)
- {
- tok->cmd = c_S;
- tok->aux = n;
- return;
- }
- } else if (tok->text[0] == 'u')
- {
- /* We expect hex characters thereafter. */
- wchar_t *p = tok->text + 1;
- int n = 0;
- while (*p && ishex(*p))
- {
- n = 16 * n + fromhex(*p);
- p++;
- }
- if (!*p)
- {
- tok->cmd = c_u;
- tok->aux = n;
- return;
- }
- }
-
- i = -1;
- j = sizeof(keywords) / sizeof(*keywords);
- while (j - i > 1)
- {
- k = (i + j) / 2;
- c = kwcmp(tok->text, keywords[k].name);
- if (c < 0)
- j = k;
- else if (c > 0)
- i = k;
- else
- { /* c == 0 */
-
- tok->cmd = keywords[k].id;
- return;
- }
- }
-
- tok->cmd = c__invalid;
-}
-
-
-/*
- * Read a token from the input file, in the normal way (`normal' in
- * the sense that code paragraphs work a different way).
- */
-token get_token(input * in)
-{
- int c;
- int nls;
- token ret;
- rdstring rs = { 0, 0, NULL };
- filepos cpos;
-
- ret.cmd = c__invalid;
- ret.aux = FALSE;
- ret.text = NULL; /* default */
- c = get(in, &cpos);
- ret.pos = cpos;
- if (iswhite(c))
- { /* tok_white or tok_eop */
- nls = 0;
- do
- {
- if (isnl(c))
- nls++;
- }
- while ((c = get(in, &cpos)) != EOF && iswhite(c));
- if (c == EOF)
- {
- ret.type = tok_eof;
- return ret;
- }
- unget(in, c, &cpos);
- ret.type = (nls > 1 ? tok_eop : tok_white);
- return ret;
- } else if (c == EOF)
- { /* tok_eof */
- ret.type = tok_eof;
- return ret;
- } else if (c == '\\')
- { /* tok_cmd */
- c = get(in, &cpos);
- if (c == '-' || c == '\\' || c == '_' ||
- c == '#' || c == '{' || c == '}')
- {
- /* single-char command */
- rdadd(&rs, (wchar_t)c);
- } else if (c == 'u')
- {
- int len = 0;
- do
- {
- rdadd(&rs, (wchar_t)c);
- len++;
- c = get(in, &cpos);
- }
- while (ishex(c) && len < 5);
- unget(in, c, &cpos);
- } else if (iscmd(c))
- {
- do
- {
- rdadd(&rs, (wchar_t)c);
- c = get(in, &cpos);
- }
- while (iscmd(c));
- unget(in, c, &cpos);
- }
- /*
- * Now match the command against the list of available
- * ones.
- */
- ret.type = tok_cmd;
- ret.text = ustrdup(rs.text);
- match_kw(&ret);
- sfree(rs.text);
- return ret;
- } else if (c == '{')
- { /* tok_lbrace */
- ret.type = tok_lbrace;
- return ret;
- } else if (c == '}')
- { /* tok_rbrace */
- ret.type = tok_rbrace;
- return ret;
- } else
- { /* tok_word */
- /*
- * Read a word: the longest possible contiguous sequence of
- * things other than whitespace, backslash, braces and
- * hyphen. A hyphen terminates the word but is returned as
- * part of it; everything else is pushed back for the next
- * token. The `aux' field contains TRUE if the word ends in
- * a hyphen.
- */
- ret.aux = FALSE; /* assumed for now */
- while (1)
- {
- if (iswhite(c) || c == '{' || c == '}' || c == '\\' || c == EOF)
- {
- /* Put back the character that caused termination */
- unget(in, c, &cpos);
- break;
- } else
- {
- rdadd(&rs, (wchar_t)c);
- if (c == '-')
- {
- ret.aux = TRUE;
- break; /* hyphen terminates word */
- }
- }
- c = get(in, &cpos);
- }
- ret.type = tok_word;
- ret.text = ustrdup(rs.text);
- sfree(rs.text);
- return ret;
- }
-}
-
-/*
- * Determine whether the next input character is an open brace (for
- * telling code paragraphs from paragraphs which merely start with
- * code).
- */
-int isbrace(input * in)
-{
- int c;
- filepos cpos;
-
- c = get(in, &cpos);
- unget(in, c, &cpos);
- return (c == '{');
-}
-
-/*
- * Read the rest of a line that starts `\c'. Including nothing at
- * all (tok_word with empty text).
- */
-token get_codepar_token(input * in)
-{
- int c;
- token ret;
- rdstring rs = { 0, 0, NULL };
- filepos cpos;
-
- ret.type = tok_word;
- c = get(in, &cpos); /* expect (and discard) one space */
- ret.pos = cpos;
- if (c == ' ')
- {
- c = get(in, &cpos);
- ret.pos = cpos;
- }
- while (!isnl(c) && c != EOF)
- {
- int c2 = c;
- c = get(in, &cpos);
- /* Discard \r just before \n. */
- if (c2 != 13 || !isnl(c))
- rdadd(&rs, (wchar_t)c2);
- }
- unget(in, c, &cpos);
- ret.text = ustrdup(rs.text);
- sfree(rs.text);
- return ret;
-}
-
-/*
- * Adds a new word to a linked list
- */
-static word *addword(word newword, word *** hptrptr)
-{
- word *mnewword;
- if (!hptrptr)
- return NULL;
- mnewword = mknew(word);
- *mnewword = newword; /* structure copy */
- mnewword->next = NULL;
- **hptrptr = mnewword;
- *hptrptr = &mnewword->next;
- return mnewword;
-}
-
-/*
- * Adds a new paragraph to a linked list
- */
-static paragraph *addpara(paragraph newpara, paragraph *** hptrptr)
-{
- paragraph *mnewpara = mknew(paragraph);
- *mnewpara = newpara; /* structure copy */
- mnewpara->next = NULL;
- **hptrptr = mnewpara;
- *hptrptr = &mnewpara->next;
- return mnewpara;
-}
-
-/*
- * Destructor before token is reassigned; should catch most memory
- * leaks
- */
-#define dtor(t) ( sfree(t.text) )
-
-/*
- * Reads a single file (ie until get() returns EOF)
- */
-static void read_file(paragraph *** ret, input * in, indexdata * idx)
-{
- token t;
- paragraph par;
- word wd, **whptr, **idximplicit;
- tree234 *macros;
- wchar_t utext[2], *wdtext;
- int style, spcstyle;
- int already;
- int iswhite, seenwhite;
- int type;
- struct stack_item {
- enum {
- stack_nop = 0, /* do nothing (for error recovery) */
- stack_ualt = 1, /* \u alternative */
- stack_style = 2, /* \e, \c, \cw */
- stack_idx = 4, /* \I, \i, \ii */
- stack_hyper = 8, /* \W */
- stack_quote = 16, /* \q */
- } type;
- word **whptr; /* to restore from \u alternatives */
- word **idximplicit; /* to restore from \u alternatives */
- } *sitem;
- stack parsestk;
- word *indexword=NULL, *uword=NULL, *iword=NULL;
- word *idxwordlist;
- rdstring indexstr;
- int index_downcase=0, index_visible=0, indexing=0;
- const rdstring nullrs = { 0, 0, NULL };
- wchar_t uchr;
-
- t.text = NULL;
- macros = newtree234(macrocmp);
- already = FALSE;
-
- /*
- * Loop on each paragraph.
- */
- while (1)
- {
- int start_cmd = c__invalid;
- par.words = NULL;
- par.keyword = NULL;
- whptr = &par.words;
-
- /*
- * Get a token.
- */
- if (!already)
- {
- dtor(t), t = get_token(in);
- }
- already = FALSE;
- if (t.type == tok_eof)
- break;
-
- /*
- * Parse code paragraphs separately.
- */
- if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in))
- {
- par.type = para_Code;
- par.fpos = t.pos;
- while (1)
- {
- dtor(t), t = get_codepar_token(in);
- wd.type = word_WeakCode;
- wd.breaks = FALSE; /* shouldn't need this... */
- wd.text = ustrdup(t.text);
- wd.alt = NULL;
- wd.fpos = t.pos;
- addword(wd, &whptr);
- dtor(t), t = get_token(in);
- if (t.type == tok_white)
- {
- /*
- * The newline after a code-paragraph line
- */
- dtor(t), t = get_token(in);
- }
- if (t.type == tok_eop || t.type == tok_eof)
- break;
- else if (t.type != tok_cmd || t.cmd != c_c)
- {
- error(err_brokencodepara, &t.pos);
- addpara(par, ret);
- while (t.type != tok_eop) /* error recovery: */
- dtor(t), t = get_token(in); /* eat rest of paragraph */
- goto codeparabroken; /* ick, but such is life */
- }
- }
- addpara(par, ret);
- codeparabroken:
- continue;
- }
-
- while (t.type == tok_cmd && macrolookup(macros, in, t.text, &t.pos))
- {
- dtor(t), t = get_token(in);
- }
-
-
- /*
- * This token begins a paragraph. See if it's one of the
- * special commands that define a paragraph type.
- *
- * (note that \# is special in a way, and \nocite takes no
- * text)
- */
- par.type = para_Normal;
- if (t.type == tok_cmd)
- {
- int needkw=0;
- int is_macro = FALSE;
-
- par.fpos = t.pos;
- switch (t.cmd)
- {
- default:
- needkw = -1;
- break;
- case c__invalid:
- error(err_badparatype, t.text, &t.pos);
- needkw = 4;
- break;
- case c__comment:
- if (isbrace(in))
- break; /* `\#{': isn't a comment para */
- do
- {
- dtor(t), t = get_token(in);
- }
- while (t.type != tok_eop && t.type != tok_eof);
- continue; /* next paragraph */
- /*
- * `needkw' values:
- *
- * 1 -- exactly one keyword
- * 2 -- at least one keyword
- * 4 -- any number of keywords including zero
- * 8 -- at least one keyword and then nothing else
- * 16 -- nothing at all! no keywords, no body
- * 32 -- no keywords at all
- */
- case c_A:
- needkw = 2;
- par.type = para_Appendix;
- break;
- case c_B:
- needkw = 2;
- par.type = para_Biblio;
- break;
- case c_BR:
- needkw = 1;
- par.type = para_BR;
- start_cmd = c_BR;
- break;
- case c_C:
- needkw = 2;
- par.type = para_Chapter;
- break;
- case c_H:
- needkw = 2;
- par.type = para_Heading;
- par.aux = 0;
- break;
- case c_IM:
- needkw = 2;
- par.type = para_IM;
- start_cmd = c_IM;
- break;
- case c_S:
- needkw = 2;
- par.type = para_Subsect;
- par.aux = t.aux;
- break;
- case c_U:
- needkw = 32;
- par.type = para_UnnumberedChapter;
- break;
- /* For \b and \n the keyword is optional */
- case c_b:
- needkw = 4;
- par.type = para_Bullet;
- break;
- case c_n:
- needkw = 4;
- par.type = para_NumberedList;
- break;
- case c_cfg:
- needkw = 8;
- par.type = para_Config;
- start_cmd = c_cfg;
- break;
- case c_copyright:
- needkw = 32;
- par.type = para_Copyright;
- break;
- case c_define:
- is_macro = TRUE;
- needkw = 1;
- break;
- /* For \nocite the keyword is _everything_ */
- case c_nocite:
- needkw = 8;
- par.type = para_NoCite;
- break;
- case c_preamble:
- needkw = 32;
- par.type = para_Preamble;
- break;
- case c_rule:
- needkw = 16;
- par.type = para_Rule;
- break;
- case c_title:
- needkw = 32;
- par.type = para_Title;
- break;
- case c_versionid:
- needkw = 32;
- par.type = para_VersionID;
- break;
- }
-
- if (needkw > 0)
- {
- rdstring rs = { 0, 0, NULL };
- int nkeys = 0;
- filepos fp;
-
- /* Get keywords. */
- dtor(t), t = get_token(in);
- fp = t.pos;
- while (t.type == tok_lbrace)
- {
- /* This is a keyword. */
- nkeys++;
- /* FIXME: there will be bugs if anyone specifies an
- * empty keyword (\foo{}), so trap this case. */
- while (dtor(t), t = get_token(in),
- t.type == tok_word ||
- t.type == tok_white ||
- (t.type == tok_cmd && t.cmd == c__nbsp) ||
- (t.type == tok_cmd && t.cmd == c__escaped))
- {
- if (t.type == tok_white ||
- (t.type == tok_cmd && t.cmd == c__nbsp))
- rdadd(&rs, ' ');
- else
- rdadds(&rs, t.text);
- }
- if (t.type != tok_rbrace)
- {
- error(err_kwunclosed, &t.pos);
- continue;
- }
- rdadd(&rs, 0); /* add string terminator */
- dtor(t), t = get_token(in); /* eat right brace */
- }
-
- rdadd(&rs, 0); /* add string terminator */
-
- /* See whether we have the right number of keywords. */
- if ((needkw & 48) && nkeys > 0)
- error(err_kwillegal, &fp);
- if ((needkw & 11) && nkeys == 0)
- error(err_kwexpected, &fp);
- if ((needkw & 5) && nkeys > 1)
- error(err_kwtoomany, &fp);
-
- if (is_macro)
- {
- /*
- * Macro definition. Get the rest of the line
- * as a code-paragraph token, repeatedly until
- * there's nothing more left of it. Separate
- * with newlines.
- */
- rdstring macrotext = { 0, 0, NULL };
- while (1)
- {
- dtor(t), t = get_codepar_token(in);
- if (macrotext.pos > 0)
- rdadd(&macrotext, L'\n');
- rdadds(&macrotext, t.text);
- dtor(t), t = get_token(in);
- if (t.type == tok_eop)
- break;
- }
- macrodef(macros, rs.text, macrotext.text, fp);
- continue; /* next paragraph */
- }
-
- par.keyword = rdtrim(&rs);
-
- /* Move to EOP in case of needkw==8 or 16 (no body) */
- if (needkw & 24)
- {
- /* We allow whitespace even when we expect no para body */
- while (t.type == tok_white)
- dtor(t), t = get_token(in);
- if (t.type != tok_eop && t.type != tok_eof &&
- (start_cmd == c__invalid ||
- t.type != tok_cmd || t.cmd != start_cmd))
- {
- error(err_bodyillegal, &t.pos);
- /* Error recovery: eat the rest of the paragraph */
- while (t.type != tok_eop && t.type != tok_eof &&
- (start_cmd == c__invalid ||
- t.type != tok_cmd || t.cmd != start_cmd))
- dtor(t), t = get_token(in);
- }
- if (t.type == tok_cmd)
- already = TRUE; /* inhibit get_token at top of loop */
- addpara(par, ret);
- continue; /* next paragraph */
- }
- }
- }
-
- /*
- * Now read the actual paragraph, word by word, adding to
- * the paragraph list.
- *
- * Mid-paragraph commands:
- *
- * \K \k
- * \c \cw
- * \e
- * \i \ii
- * \I
- * \u
- * \W
- * \date
- * \\ \{ \}
- */
- parsestk = stk_new();
- style = word_Normal;
- spcstyle = word_WhiteSpace;
- indexing = FALSE;
- seenwhite = TRUE;
- while (t.type != tok_eop && t.type != tok_eof)
- {
- iswhite = FALSE;
- already = FALSE;
-
- /* Handle implicit paragraph breaks after \IM, \BR etc */
- if (start_cmd != c__invalid &&
- t.type == tok_cmd && t.cmd == start_cmd)
- {
- already = TRUE; /* inhibit get_token at top of loop */
- break;
- }
-
- if (t.type == tok_cmd && t.cmd == c__escaped)
- {
- t.type = tok_word; /* nice and simple */
- t.aux = 0; /* even if `\-' - nonbreaking! */
- }
- if (t.type == tok_cmd && t.cmd == c__nbsp)
- {
- t.type = tok_word; /* nice and simple */
- sfree(t.text);
- t.text = ustrdup(L" "); /* text is ` ' not `_' */
- t.aux = 0; /* (nonbreaking) */
- }
- switch (t.type)
- {
- case tok_white:
- if (whptr == &par.words)
- break; /* strip whitespace at start of para */
- wd.text = NULL;
- wd.type = spcstyle;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
-
- /*
- * Inhibit use of whitespace if it's (probably the
- * newline) before a repeat \IM / \BR type
- * directive.
- */
- if (start_cmd != c__invalid)
- {
- dtor(t), t = get_token(in);
- already = TRUE;
- if (t.type == tok_cmd && t.cmd == start_cmd)
- break;
- }
-
- if (indexing)
- rdadd(&indexstr, ' ');
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- addword(wd, &idximplicit);
- iswhite = TRUE;
- break;
- case tok_word:
- if (indexing)
- rdadds(&indexstr, t.text);
- wd.type = style;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- wd.breaks = t.aux;
- if (!indexing || index_visible)
- {
- wd.text = ustrdup(t.text);
- addword(wd, &whptr);
- }
- if (indexing)
- {
- wd.text = ustrdup(t.text);
- addword(wd, &idximplicit);
- }
- break;
- case tok_lbrace:
- error(err_unexbrace, &t.pos);
- /* Error recovery: push nop */
- sitem = mknew(struct stack_item);
- sitem->type = stack_nop;
- stk_push(parsestk, sitem);
- break;
- case tok_rbrace:
- sitem = stk_pop(parsestk);
- if (!sitem)
- error(err_unexbrace, &t.pos);
- else
- {
- if (sitem->type & stack_ualt)
- {
- whptr = sitem->whptr;
- idximplicit = sitem->idximplicit;
- }
- if (sitem->type & stack_style)
- {
- style = word_Normal;
- spcstyle = word_WhiteSpace;
- }
- if (sitem->type & stack_idx ) {
- indexword->text = ustrdup(indexstr.text);
- if (index_downcase)
- ustrlow(indexword->text);
- indexing = FALSE;
- rdadd(&indexstr, L'\0');
- index_merge(idx, FALSE, indexstr.text, idxwordlist);
- sfree(indexstr.text);
- }
- if (sitem->type & stack_hyper)
- {
- wd.text = NULL;
- wd.type = word_HyperEnd;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- addword(wd, &idximplicit);
- }
- if (sitem->type & stack_quote)
- {
- wd.text = NULL;
- wd.type = toquotestyle(style);
- wd.alt = NULL;
- wd.aux = quote_Close;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- {
- rdadd(&indexstr, L'"');
- addword(wd, &idximplicit);
- }
- }
- }
- sfree(sitem);
- break;
- case tok_cmd:
- switch (t.cmd)
- {
- case c__comment:
- /*
- * In-paragraph comment: \#{ balanced braces }
- *
- * Anything goes here; even tok_eop. We should
- * eat whitespace after the close brace _if_
- * there was whitespace before the \#.
- */
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- } else
- {
- int braces = 1;
- while (braces > 0)
- {
- dtor(t), t = get_token(in);
- if (t.type == tok_lbrace)
- braces++;
- else if (t.type == tok_rbrace)
- braces--;
- else if (t.type == tok_eof)
- {
- error(err_commenteof, &t.pos);
- break;
- }
- }
- }
- if (seenwhite)
- {
- already = TRUE;
- dtor(t), t = get_token(in);
- if (t.type == tok_white)
- {
- iswhite = TRUE;
- already = FALSE;
- }
- }
- break;
- case c_q:
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- } else
- {
- wd.text = NULL;
- wd.type = toquotestyle(style);
- wd.alt = NULL;
- wd.aux = quote_Open;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- {
- rdadd(&indexstr, L'"');
- addword(wd, &idximplicit);
- }
- sitem = mknew(struct stack_item);
- sitem->type = stack_quote;
- stk_push(parsestk, sitem);
- }
- break;
- case c_K:
- case c_k:
- case c_R:
- case c_W:
- case c_L:
- case c_date:
- /*
- * Keyword, hyperlink, or \date. We expect a
- * left brace, some text, and then a right
- * brace. No nesting; no arguments.
- */
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (t.cmd == c_K)
- wd.type = word_UpperXref;
- else if (t.cmd == c_k)
- wd.type = word_LowerXref;
- else if (t.cmd == c_R)
- wd.type = word_FreeTextXref;
- else if (t.cmd == c_W)
- wd.type = word_HyperLink;
- else if (t.cmd == c_L)
- wd.type = word_LocalHyperLink;
- else
- wd.type = word_Normal;
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- if (wd.type == word_Normal)
- {
- time_t thetime = time(NULL);
- struct tm *broken = localtime(&thetime);
- already = TRUE;
- wdtext = ustrftime(NULL, broken);
- wd.type = style;
- } else
- {
- error(err_explbr, &t.pos);
- wdtext = NULL;
- }
- } else
- {
- rdstring rs = { 0, 0, NULL };
- while (dtor(t), t = get_token(in),
- t.type == tok_word || t.type == tok_white)
- {
- if (t.type == tok_white)
- rdadd(&rs, ' ');
- else
- rdadds(&rs, t.text);
- }
- if (wd.type == word_Normal)
- {
- time_t thetime = time(NULL);
- struct tm *broken = localtime(&thetime);
- wdtext = ustrftime(rs.text, broken);
- wd.type = style;
- } else
- {
- wdtext = ustrdup(rs.text);
- }
- sfree(rs.text);
- if (t.type != tok_rbrace)
- {
- error(err_kwexprbr, &t.pos);
- }
- }
- wd.alt = NULL;
- wd.aux = 0;
- if (!indexing || index_visible)
- {
- wd.text = ustrdup(wdtext);
- addword(wd, &whptr);
- }
- if (indexing)
- {
- wd.text = ustrdup(wdtext);
- addword(wd, &idximplicit);
- }
- sfree(wdtext);
- if (wd.type == word_FreeTextXref || wd.type == word_HyperLink || wd.type == word_LocalHyperLink)
- {
- /*
- * Hyperlinks are different: they then
- * expect another left brace, to begin
- * delimiting the text marked by the link.
- */
- dtor(t), t = get_token(in);
- /*
- * Special cases: \W{}\c, \W{}\e, \W{}\cw
- */
- sitem = mknew(struct stack_item);
- sitem->type = stack_hyper;
- if (t.type == tok_cmd &&
- (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
- {
- if (style != word_Normal)
- error(err_nestedstyles, &t.pos);
- else
- {
- style = (t.cmd == c_c ? word_Code :
- t.cmd == c_cw ? word_WeakCode : word_Emph);
- spcstyle = tospacestyle(style);
- sitem->type |= stack_style;
- }
- dtor(t), t = get_token(in);
- }
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- sfree(sitem);
- } else
- {
- stk_push(parsestk, sitem);
- }
- }
- break;
- case c_c:
- case c_cw:
- case c_e:
- type = t.cmd;
- if (style != word_Normal)
- {
- error(err_nestedstyles, &t.pos);
- /* Error recovery: eat lbrace, push nop. */
- dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
- sitem->type = stack_nop;
- stk_push(parsestk, sitem);
- }
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- } else
- {
- style = (type == c_c ? word_Code :
- type == c_cw ? word_WeakCode : word_Emph);
- spcstyle = tospacestyle(style);
- sitem = mknew(struct stack_item);
- sitem->type = stack_style;
- stk_push(parsestk, sitem);
- }
- break;
- case c_i:
- case c_ii:
- case c_I:
- type = t.cmd;
- if (indexing)
- {
- error(err_nestedindex, &t.pos);
- /* Error recovery: eat lbrace, push nop. */
- dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
- sitem->type = stack_nop;
- stk_push(parsestk, sitem);
- }
- sitem = mknew(struct stack_item);
- sitem->type = stack_idx;
- dtor(t), t = get_token(in);
- /*
- * Special cases: \i\c, \i\e, \i\cw
- */
- wd.fpos = t.pos;
- if (t.type == tok_cmd &&
- (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
- {
- if (style != word_Normal)
- error(err_nestedstyles, &t.pos);
- else
- {
- style = (t.cmd == c_c ? word_Code :
- t.cmd == c_cw ? word_WeakCode : word_Emph);
- spcstyle = tospacestyle(style);
- sitem->type |= stack_style;
- }
- dtor(t), t = get_token(in);
- }
- if (t.type != tok_lbrace)
- {
- sfree(sitem);
- error(err_explbr, &t.pos);
- } else
- {
- /* Add an index-reference word with no text as yet */
- wd.type = word_IndexRef;
- wd.text = NULL;
- wd.alt = NULL;
- wd.aux = 0;
- wd.breaks = FALSE;
- indexword = addword(wd, &whptr);
- /* Set up a rdstring to read the index text */
- indexstr = nullrs;
- /* Flags so that we do the Right Things with text */
- index_visible = (type != c_I);
- index_downcase = (type == c_ii);
- indexing = TRUE;
- idxwordlist = NULL;
- idximplicit = &idxwordlist;
- /* Stack item to close the indexing on exit */
- stk_push(parsestk, sitem);
- }
- break;
- case c_u:
- uchr = t.aux;
- utext[0] = uchr;
- utext[1] = 0;
- wd.type = style;
- wd.breaks = FALSE;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- if (!indexing || index_visible)
- {
- wd.text = ustrdup(utext);
- uword = addword(wd, &whptr);
- } else
- uword = NULL;
- if (indexing)
- {
- wd.text = ustrdup(utext);
- iword = addword(wd, &idximplicit);
- } else
- iword = NULL;
- dtor(t), t = get_token(in);
- if (t.type == tok_lbrace)
- {
- /*
- * \u with a left brace. Until the brace
- * closes, all further words go on a
- * sidetrack from the main thread of the
- * paragraph.
- */
- sitem = mknew(struct stack_item);
- sitem->type = stack_ualt;
- sitem->whptr = whptr;
- sitem->idximplicit = idximplicit;
- stk_push(parsestk, sitem);
- whptr = uword ? &uword->alt : NULL;
- idximplicit = iword ? &iword->alt : NULL;
- } else
- {
- if (indexing)
- rdadd(&indexstr, uchr);
- already = TRUE;
- }
- break;
- default:
- if (!macrolookup(macros, in, t.text, &t.pos))
- error(err_badmidcmd, t.text, &t.pos);
- break;
- }
- }
- if (!already)
- dtor(t), t = get_token(in);
- seenwhite = iswhite;
- }
- /* Check the stack is empty */
- if (NULL != (sitem = stk_pop(parsestk)))
- {
- do
- {
- sfree(sitem);
- sitem = stk_pop(parsestk);
- }
- while (sitem);
- error(err_missingrbrace, &t.pos);
- }
- stk_free(parsestk);
- addpara(par, ret);
- }
-
- /*
- * We break to here rather than returning, because otherwise
- * this cleanup doesn't happen.
- */
- dtor(t);
- macrocleanup(macros);
-}
-
-paragraph *read_input(input * in, indexdata * idx)
-{
- paragraph *head = NULL;
- paragraph **hptr = &head;
-
- while (in->currindex < in->nfiles)
- {
- in->currfp = fopen(in->filenames[in->currindex], "r");
- if (in->currfp)
- {
- setpos(in, in->filenames[in->currindex]);
- read_file(&hptr, in, idx);
- }
- in->currindex++;
- }
-
- return head;
-}
+/*
+ * input.c: read the source form
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include <time.h>
+#include "halibut.h"
+
+#define TAB_STOP 8 /* for column number tracking */
+
+static void setpos(input * in, char *fname)
+{
+ in->pos.filename = fname;
+ in->pos.line = 1;
+ in->pos.col = (in->reportcols ? 1 : -1);
+}
+
+static void unget(input * in, int c, filepos * pos)
+{
+ if (in->npushback >= in->pushbacksize)
+ {
+ in->pushbacksize = in->npushback + 16;
+ in->pushback = resize(in->pushback, in->pushbacksize);
+ }
+ in->pushback[in->npushback].chr = c;
+ in->pushback[in->npushback].pos = *pos; /* structure copy */
+ in->npushback++;
+}
+
+/* ---------------------------------------------------------------------- */
+/*
+ * Macro subsystem
+ */
+typedef struct macro_Tag macro;
+struct macro_Tag {
+ wchar_t *name, *text;
+};
+struct macrostack_Tag {
+ macrostack *next;
+ wchar_t *text;
+ int ptr, npushback;
+ filepos pos;
+};
+static int macrocmp(void *av, void *bv)
+{
+ macro *a = (macro *) av, *b = (macro *) bv;
+ return ustrcmp(a->name, b->name);
+}
+static void
+macrodef(tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos)
+{
+ macro *m = mknew(macro);
+ m->name = name;
+ m->text = text;
+ if (add234(macros, m) != m)
+ {
+ error(err_macroexists, &fpos, name);
+ sfree(name);
+ sfree(text);
+ }
+}
+static int
+macrolookup(tree234 * macros, input * in, wchar_t * name, filepos * pos)
+{
+ macro m, *gotit;
+ m.name = name;
+ gotit = find234(macros, &m, NULL);
+ if (gotit)
+ {
+ macrostack *expansion = mknew(macrostack);
+ expansion->next = in->stack;
+ expansion->text = gotit->text;
+ expansion->pos = *pos; /* structure copy */
+ expansion->ptr = 0;
+ expansion->npushback = in->npushback;
+ in->stack = expansion;
+ return TRUE;
+ } else
+ return FALSE;
+}
+static void macrocleanup(tree234 * macros)
+{
+ int ti;
+ macro *m;
+ for (ti = 0; (m = (macro *) index234(macros, ti)) != NULL; ti++)
+ {
+ sfree(m->name);
+ sfree(m->text);
+ sfree(m);
+ }
+ freetree234(macros);
+}
+
+/*
+ * Can return EOF
+ */
+static int get(input * in, filepos * pos)
+{
+ int pushbackpt = in->stack ? in->stack->npushback : 0;
+ if (in->npushback > pushbackpt)
+ {
+ --in->npushback;
+ if (pos)
+ *pos = in->pushback[in->npushback].pos; /* structure copy */
+ return in->pushback[in->npushback].chr;
+ } else if (in->stack)
+ {
+ wchar_t c = in->stack->text[in->stack->ptr];
+ if (in->stack->text[++in->stack->ptr] == L'\0')
+ {
+ macrostack *tmp = in->stack;
+ in->stack = tmp->next;
+ sfree(tmp);
+ }
+ return c;
+ } else if (in->currfp)
+ {
+ int c = getc(in->currfp);
+
+ if (c == EOF)
+ {
+ fclose(in->currfp);
+ in->currfp = NULL;
+ }
+ /* Track line numbers, for error reporting */
+ if (pos)
+ *pos = in->pos;
+ if (in->reportcols)
+ {
+ switch (c)
+ {
+ case '\t':
+ in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP;
+ break;
+ case '\n':
+ in->pos.col = 1;
+ in->pos.line++;
+ break;
+ default:
+ in->pos.col++;
+ break;
+ }
+ } else
+ {
+ in->pos.col = -1;
+ if (c == '\n')
+ in->pos.line++;
+ }
+ /* FIXME: do input charmap translation. We should be returning
+ * Unicode here. */
+ return c;
+ } else
+ return EOF;
+}
+
+/*
+ * Lexical analysis of source files.
+ */
+typedef struct token_Tag token;
+struct token_Tag {
+ int type;
+ int cmd, aux;
+ wchar_t *text;
+ filepos pos;
+};
+enum {
+ tok_eof, /* end of file */
+ tok_eop, /* end of paragraph */
+ tok_white, /* whitespace */
+ tok_word, /* a word or word fragment */
+ tok_cmd, /* \command */
+ tok_lbrace, /* { */
+ tok_rbrace /* } */
+};
+
+/* Halibut command keywords. */
+enum {
+ c__invalid, /* invalid command */
+ c__comment, /* comment command (\#) */
+ c__escaped, /* escaped character */
+ c__nbsp, /* nonbreaking space */
+ c_A, /* appendix heading */
+ c_B, /* bibliography entry */
+ c_BR, /* bibliography rewrite */
+ c_C, /* chapter heading */
+ c_H, /* heading */
+ c_I, /* invisible index mark */
+ c_IM, /* index merge/rewrite */
+ c_K, /* capitalised cross-reference */
+ c_S, /* aux field is 0, 1, 2, ... */
+ c_U, /* unnumbered-chapter heading */
+ c_W, /* Web hyperlink */
+ c_L, /* Relative/local hyperlink */
+ c_b, /* bulletted list */
+ c_c, /* code */
+ c_cfg, /* configuration directive */
+ c_copyright, /* copyright statement */
+ c_cw, /* weak code */
+ c_date, /* document processing date */
+ c_define, /* macro definition */
+ c_e, /* emphasis */
+ c_i, /* visible index mark */
+ c_ii, /* uncapitalised visible index mark */
+ c_k, /* uncapitalised cross-reference */
+ c_R, /* free text cross-reference */
+ c_n, /* numbered list */
+ c_nocite, /* bibliography trickery */
+ c_preamble, /* document preamble text */
+ c_q, /* quote marks */
+ c_rule, /* horizontal rule */
+ c_title, /* document title */
+ c_u, /* aux field is char code */
+ c_versionid /* document RCS id */
+};
+
+/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
+#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
+#define isnl(c) ( (c)==10 )
+#define isdec(c) ( ((c)>='0'&&(c)<='9') )
+#define fromdec(c) ( (c)-'0' )
+#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
+#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
+#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
+
+/*
+ * Keyword comparison function. Like strcmp, but between a wchar_t *
+ * and a char *.
+ */
+static int kwcmp(wchar_t const *p, char const *q)
+{
+ int i;
+ do
+ {
+ i = *p - *q;
+ }
+ while (*p++ && *q++ && !i);
+ return i;
+}
+
+/*
+ * Match a keyword.
+ */
+static void match_kw(token * tok)
+{
+ /*
+ * FIXME. The ids are explicit in here so as to allow long-name
+ * equivalents to the various very short keywords.
+ *
+ * This list must be sorted, it's searched using binary search.
+ */
+ static const struct {
+ char const *name;
+ int id;
+ } keywords[] = {
+ {
+ "#", c__comment}
+ , /* comment command (\#) */
+ {
+ "-", c__escaped}
+ , /* nonbreaking hyphen */
+ {
+ "A", c_A}
+ , /* appendix heading */
+ {
+ "B", c_B}
+ , /* bibliography entry */
+ {
+ "BR", c_BR}
+ , /* bibliography rewrite */
+ {
+ "C", c_C}
+ , /* chapter heading */
+ {
+ "H", c_H}
+ , /* heading */
+ {
+ "I", c_I}
+ , /* invisible index mark */
+ {
+ "IM", c_IM}
+ , /* index merge/rewrite */
+ {
+ "K", c_K}
+ , /* capitalised cross-reference */
+ {
+ "L", c_L}
+ , /* Relative/local hyperlink */
+ {
+ "R", c_R}
+ , /* free text cross-reference */
+ {
+ "U", c_U}
+ , /* unnumbered-chapter heading */
+ {
+ "W", c_W}
+ , /* Web hyperlink */
+ {
+ "\\", c__escaped}
+ , /* escaped backslash (\\) */
+ {
+ "_", c__nbsp}
+ , /* nonbreaking space (\_) */
+ {
+ "b", c_b}
+ , /* bulletted list */
+ {
+ "c", c_c}
+ , /* code */
+ {
+ "cfg", c_cfg}
+ , /* configuration directive */
+ {
+ "copyright", c_copyright}
+ , /* copyright statement */
+ {
+ "cw", c_cw}
+ , /* weak code */
+ {
+ "date", c_date}
+ , /* document processing date */
+ {
+ "define", c_define}
+ , /* macro definition */
+ {
+ "e", c_e}
+ , /* emphasis */
+ {
+ "i", c_i}
+ , /* visible index mark */
+ {
+ "ii", c_ii}
+ , /* uncapitalised visible index mark */
+ {
+ "k", c_k}
+ , /* uncapitalised cross-reference */
+ {
+ "n", c_n}
+ , /* numbered list */
+ {
+ "nocite", c_nocite}
+ , /* bibliography trickery */
+ {
+ "preamble", c_preamble}
+ , /* document preamble text */
+ {
+ "q", c_q}
+ , /* quote marks */
+ {
+ "rule", c_rule}
+ , /* horizontal rule */
+ {
+ "title", c_title}
+ , /* document title */
+ {
+ "versionid", c_versionid}
+ , /* document RCS id */
+ {
+ "{", c__escaped}
+ , /* escaped lbrace (\{) */
+ {
+ "}", c__escaped}
+ , /* escaped rbrace (\}) */
+ };
+ int i, j, k, c;
+
+ /*
+ * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
+ * doesn't match correctly, we just fall through to the
+ * binary-search phase.
+ */
+ if (tok->text[0] == 'S')
+ {
+ /* We expect numeric characters thereafter. */
+ wchar_t *p = tok->text + 1;
+ int n;
+ if (!*p)
+ n = 1;
+ else
+ {
+ n = 0;
+ while (*p && isdec(*p))
+ {
+ n = 10 * n + fromdec(*p);
+ p++;
+ }
+ }
+ if (!*p)
+ {
+ tok->cmd = c_S;
+ tok->aux = n;
+ return;
+ }
+ } else if (tok->text[0] == 'u')
+ {
+ /* We expect hex characters thereafter. */
+ wchar_t *p = tok->text + 1;
+ int n = 0;
+ while (*p && ishex(*p))
+ {
+ n = 16 * n + fromhex(*p);
+ p++;
+ }
+ if (!*p)
+ {
+ tok->cmd = c_u;
+ tok->aux = n;
+ return;
+ }
+ }
+
+ i = -1;
+ j = sizeof(keywords) / sizeof(*keywords);
+ while (j - i > 1)
+ {
+ k = (i + j) / 2;
+ c = kwcmp(tok->text, keywords[k].name);
+ if (c < 0)
+ j = k;
+ else if (c > 0)
+ i = k;
+ else
+ { /* c == 0 */
+
+ tok->cmd = keywords[k].id;
+ return;
+ }
+ }
+
+ tok->cmd = c__invalid;
+}
+
+
+/*
+ * Read a token from the input file, in the normal way (`normal' in
+ * the sense that code paragraphs work a different way).
+ */
+token get_token(input * in)
+{
+ int c;
+ int nls;
+ token ret;
+ rdstring rs = { 0, 0, NULL };
+ filepos cpos;
+
+ ret.cmd = c__invalid;
+ ret.aux = FALSE;
+ ret.text = NULL; /* default */
+ c = get(in, &cpos);
+ ret.pos = cpos;
+ if (iswhite(c))
+ { /* tok_white or tok_eop */
+ nls = 0;
+ do
+ {
+ if (isnl(c))
+ nls++;
+ }
+ while ((c = get(in, &cpos)) != EOF && iswhite(c));
+ if (c == EOF)
+ {
+ ret.type = tok_eof;
+ return ret;
+ }
+ unget(in, c, &cpos);
+ ret.type = (nls > 1 ? tok_eop : tok_white);
+ return ret;
+ } else if (c == EOF)
+ { /* tok_eof */
+ ret.type = tok_eof;
+ return ret;
+ } else if (c == '\\')
+ { /* tok_cmd */
+ c = get(in, &cpos);
+ if (c == '-' || c == '\\' || c == '_' ||
+ c == '#' || c == '{' || c == '}')
+ {
+ /* single-char command */
+ rdadd(&rs, (wchar_t)c);
+ } else if (c == 'u')
+ {
+ int len = 0;
+ do
+ {
+ rdadd(&rs, (wchar_t)c);
+ len++;
+ c = get(in, &cpos);
+ }
+ while (ishex(c) && len < 5);
+ unget(in, c, &cpos);
+ } else if (iscmd(c))
+ {
+ do
+ {
+ rdadd(&rs, (wchar_t)c);
+ c = get(in, &cpos);
+ }
+ while (iscmd(c));
+ unget(in, c, &cpos);
+ }
+ /*
+ * Now match the command against the list of available
+ * ones.
+ */
+ ret.type = tok_cmd;
+ ret.text = ustrdup(rs.text);
+ match_kw(&ret);
+ sfree(rs.text);
+ return ret;
+ } else if (c == '{')
+ { /* tok_lbrace */
+ ret.type = tok_lbrace;
+ return ret;
+ } else if (c == '}')
+ { /* tok_rbrace */
+ ret.type = tok_rbrace;
+ return ret;
+ } else
+ { /* tok_word */
+ /*
+ * Read a word: the longest possible contiguous sequence of
+ * things other than whitespace, backslash, braces and
+ * hyphen. A hyphen terminates the word but is returned as
+ * part of it; everything else is pushed back for the next
+ * token. The `aux' field contains TRUE if the word ends in
+ * a hyphen.
+ */
+ ret.aux = FALSE; /* assumed for now */
+ while (1)
+ {
+ if (iswhite(c) || c == '{' || c == '}' || c == '\\' || c == EOF)
+ {
+ /* Put back the character that caused termination */
+ unget(in, c, &cpos);
+ break;
+ } else
+ {
+ rdadd(&rs, (wchar_t)c);
+ if (c == '-')
+ {
+ ret.aux = TRUE;
+ break; /* hyphen terminates word */
+ }
+ }
+ c = get(in, &cpos);
+ }
+ ret.type = tok_word;
+ ret.text = ustrdup(rs.text);
+ sfree(rs.text);
+ return ret;
+ }
+}
+
+/*
+ * Determine whether the next input character is an open brace (for
+ * telling code paragraphs from paragraphs which merely start with
+ * code).
+ */
+int isbrace(input * in)
+{
+ int c;
+ filepos cpos;
+
+ c = get(in, &cpos);
+ unget(in, c, &cpos);
+ return (c == '{');
+}
+
+/*
+ * Read the rest of a line that starts `\c'. Including nothing at
+ * all (tok_word with empty text).
+ */
+token get_codepar_token(input * in)
+{
+ int c;
+ token ret;
+ rdstring rs = { 0, 0, NULL };
+ filepos cpos;
+
+ ret.type = tok_word;
+ c = get(in, &cpos); /* expect (and discard) one space */
+ ret.pos = cpos;
+ if (c == ' ')
+ {
+ c = get(in, &cpos);
+ ret.pos = cpos;
+ }
+ while (!isnl(c) && c != EOF)
+ {
+ int c2 = c;
+ c = get(in, &cpos);
+ /* Discard \r just before \n. */
+ if (c2 != 13 || !isnl(c))
+ rdadd(&rs, (wchar_t)c2);
+ }
+ unget(in, c, &cpos);
+ ret.text = ustrdup(rs.text);
+ sfree(rs.text);
+ return ret;
+}
+
+/*
+ * Adds a new word to a linked list
+ */
+static word *addword(word newword, word *** hptrptr)
+{
+ word *mnewword;
+ if (!hptrptr)
+ return NULL;
+ mnewword = mknew(word);
+ *mnewword = newword; /* structure copy */
+ mnewword->next = NULL;
+ **hptrptr = mnewword;
+ *hptrptr = &mnewword->next;
+ return mnewword;
+}
+
+/*
+ * Adds a new paragraph to a linked list
+ */
+static paragraph *addpara(paragraph newpara, paragraph *** hptrptr)
+{
+ paragraph *mnewpara = mknew(paragraph);
+ *mnewpara = newpara; /* structure copy */
+ mnewpara->next = NULL;
+ **hptrptr = mnewpara;
+ *hptrptr = &mnewpara->next;
+ return mnewpara;
+}
+
+/*
+ * Destructor before token is reassigned; should catch most memory
+ * leaks
+ */
+#define dtor(t) ( sfree(t.text) )
+
+/*
+ * Reads a single file (ie until get() returns EOF)
+ */
+static void read_file(paragraph *** ret, input * in, indexdata * idx)
+{
+ token t;
+ paragraph par;
+ word wd, **whptr, **idximplicit;
+ tree234 *macros;
+ wchar_t utext[2], *wdtext;
+ int style, spcstyle;
+ int already;
+ int iswhite, seenwhite;
+ int type;
+ struct stack_item {
+ enum {
+ stack_nop = 0, /* do nothing (for error recovery) */
+ stack_ualt = 1, /* \u alternative */
+ stack_style = 2, /* \e, \c, \cw */
+ stack_idx = 4, /* \I, \i, \ii */
+ stack_hyper = 8, /* \W */
+ stack_quote = 16, /* \q */
+ } type;
+ word **whptr; /* to restore from \u alternatives */
+ word **idximplicit; /* to restore from \u alternatives */
+ } *sitem;
+ stack parsestk;
+ word *indexword=NULL, *uword=NULL, *iword=NULL;
+ word *idxwordlist;
+ rdstring indexstr;
+ int index_downcase=0, index_visible=0, indexing=0;
+ const rdstring nullrs = { 0, 0, NULL };
+ wchar_t uchr;
+
+ t.text = NULL;
+ macros = newtree234(macrocmp);
+ already = FALSE;
+
+ /*
+ * Loop on each paragraph.
+ */
+ while (1)
+ {
+ int start_cmd = c__invalid;
+ par.words = NULL;
+ par.keyword = NULL;
+ whptr = &par.words;
+
+ /*
+ * Get a token.
+ */
+ if (!already)
+ {
+ dtor(t), t = get_token(in);
+ }
+ already = FALSE;
+ if (t.type == tok_eof)
+ break;
+
+ /*
+ * Parse code paragraphs separately.
+ */
+ if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in))
+ {
+ par.type = para_Code;
+ par.fpos = t.pos;
+ while (1)
+ {
+ dtor(t), t = get_codepar_token(in);
+ wd.type = word_WeakCode;
+ wd.breaks = FALSE; /* shouldn't need this... */
+ wd.text = ustrdup(t.text);
+ wd.alt = NULL;
+ wd.fpos = t.pos;
+ addword(wd, &whptr);
+ dtor(t), t = get_token(in);
+ if (t.type == tok_white)
+ {
+ /*
+ * The newline after a code-paragraph line
+ */
+ dtor(t), t = get_token(in);
+ }
+ if (t.type == tok_eop || t.type == tok_eof)
+ break;
+ else if (t.type != tok_cmd || t.cmd != c_c)
+ {
+ error(err_brokencodepara, &t.pos);
+ addpara(par, ret);
+ while (t.type != tok_eop) /* error recovery: */
+ dtor(t), t = get_token(in); /* eat rest of paragraph */
+ goto codeparabroken; /* ick, but such is life */
+ }
+ }
+ addpara(par, ret);
+ codeparabroken:
+ continue;
+ }
+
+ while (t.type == tok_cmd && macrolookup(macros, in, t.text, &t.pos))
+ {
+ dtor(t), t = get_token(in);
+ }
+
+
+ /*
+ * This token begins a paragraph. See if it's one of the
+ * special commands that define a paragraph type.
+ *
+ * (note that \# is special in a way, and \nocite takes no
+ * text)
+ */
+ par.type = para_Normal;
+ if (t.type == tok_cmd)
+ {
+ int needkw=0;
+ int is_macro = FALSE;
+
+ par.fpos = t.pos;
+ switch (t.cmd)
+ {
+ default:
+ needkw = -1;
+ break;
+ case c__invalid:
+ error(err_badparatype, t.text, &t.pos);
+ needkw = 4;
+ break;
+ case c__comment:
+ if (isbrace(in))
+ break; /* `\#{': isn't a comment para */
+ do
+ {
+ dtor(t), t = get_token(in);
+ }
+ while (t.type != tok_eop && t.type != tok_eof);
+ continue; /* next paragraph */
+ /*
+ * `needkw' values:
+ *
+ * 1 -- exactly one keyword
+ * 2 -- at least one keyword
+ * 4 -- any number of keywords including zero
+ * 8 -- at least one keyword and then nothing else
+ * 16 -- nothing at all! no keywords, no body
+ * 32 -- no keywords at all
+ */
+ case c_A:
+ needkw = 2;
+ par.type = para_Appendix;
+ break;
+ case c_B:
+ needkw = 2;
+ par.type = para_Biblio;
+ break;
+ case c_BR:
+ needkw = 1;
+ par.type = para_BR;
+ start_cmd = c_BR;
+ break;
+ case c_C:
+ needkw = 2;
+ par.type = para_Chapter;
+ break;
+ case c_H:
+ needkw = 2;
+ par.type = para_Heading;
+ par.aux = 0;
+ break;
+ case c_IM:
+ needkw = 2;
+ par.type = para_IM;
+ start_cmd = c_IM;
+ break;
+ case c_S:
+ needkw = 2;
+ par.type = para_Subsect;
+ par.aux = t.aux;
+ break;
+ case c_U:
+ needkw = 32;
+ par.type = para_UnnumberedChapter;
+ break;
+ /* For \b and \n the keyword is optional */
+ case c_b:
+ needkw = 4;
+ par.type = para_Bullet;
+ break;
+ case c_n:
+ needkw = 4;
+ par.type = para_NumberedList;
+ break;
+ case c_cfg:
+ needkw = 8;
+ par.type = para_Config;
+ start_cmd = c_cfg;
+ break;
+ case c_copyright:
+ needkw = 32;
+ par.type = para_Copyright;
+ break;
+ case c_define:
+ is_macro = TRUE;
+ needkw = 1;
+ break;
+ /* For \nocite the keyword is _everything_ */
+ case c_nocite:
+ needkw = 8;
+ par.type = para_NoCite;
+ break;
+ case c_preamble:
+ needkw = 32;
+ par.type = para_Preamble;
+ break;
+ case c_rule:
+ needkw = 16;
+ par.type = para_Rule;
+ break;
+ case c_title:
+ needkw = 32;
+ par.type = para_Title;
+ break;
+ case c_versionid:
+ needkw = 32;
+ par.type = para_VersionID;
+ break;
+ }
+
+ if (needkw > 0)
+ {
+ rdstring rs = { 0, 0, NULL };
+ int nkeys = 0;
+ filepos fp;
+
+ /* Get keywords. */
+ dtor(t), t = get_token(in);
+ fp = t.pos;
+ while (t.type == tok_lbrace)
+ {
+ /* This is a keyword. */
+ nkeys++;
+ /* FIXME: there will be bugs if anyone specifies an
+ * empty keyword (\foo{}), so trap this case. */
+ while (dtor(t), t = get_token(in),
+ t.type == tok_word ||
+ t.type == tok_white ||
+ (t.type == tok_cmd && t.cmd == c__nbsp) ||
+ (t.type == tok_cmd && t.cmd == c__escaped))
+ {
+ if (t.type == tok_white ||
+ (t.type == tok_cmd && t.cmd == c__nbsp))
+ rdadd(&rs, ' ');
+ else
+ rdadds(&rs, t.text);
+ }
+ if (t.type != tok_rbrace)
+ {
+ error(err_kwunclosed, &t.pos);
+ continue;
+ }
+ rdadd(&rs, 0); /* add string terminator */
+ dtor(t), t = get_token(in); /* eat right brace */
+ }
+
+ rdadd(&rs, 0); /* add string terminator */
+
+ /* See whether we have the right number of keywords. */
+ if ((needkw & 48) && nkeys > 0)
+ error(err_kwillegal, &fp);
+ if ((needkw & 11) && nkeys == 0)
+ error(err_kwexpected, &fp);
+ if ((needkw & 5) && nkeys > 1)
+ error(err_kwtoomany, &fp);
+
+ if (is_macro)
+ {
+ /*
+ * Macro definition. Get the rest of the line
+ * as a code-paragraph token, repeatedly until
+ * there's nothing more left of it. Separate
+ * with newlines.
+ */
+ rdstring macrotext = { 0, 0, NULL };
+ while (1)
+ {
+ dtor(t), t = get_codepar_token(in);
+ if (macrotext.pos > 0)
+ rdadd(&macrotext, L'\n');
+ rdadds(&macrotext, t.text);
+ dtor(t), t = get_token(in);
+ if (t.type == tok_eop)
+ break;
+ }
+ macrodef(macros, rs.text, macrotext.text, fp);
+ continue; /* next paragraph */
+ }
+
+ par.keyword = rdtrim(&rs);
+
+ /* Move to EOP in case of needkw==8 or 16 (no body) */
+ if (needkw & 24)
+ {
+ /* We allow whitespace even when we expect no para body */
+ while (t.type == tok_white)
+ dtor(t), t = get_token(in);
+ if (t.type != tok_eop && t.type != tok_eof &&
+ (start_cmd == c__invalid ||
+ t.type != tok_cmd || t.cmd != start_cmd))
+ {
+ error(err_bodyillegal, &t.pos);
+ /* Error recovery: eat the rest of the paragraph */
+ while (t.type != tok_eop && t.type != tok_eof &&
+ (start_cmd == c__invalid ||
+ t.type != tok_cmd || t.cmd != start_cmd))
+ dtor(t), t = get_token(in);
+ }
+ if (t.type == tok_cmd)
+ already = TRUE; /* inhibit get_token at top of loop */
+ addpara(par, ret);
+ continue; /* next paragraph */
+ }
+ }
+ }
+
+ /*
+ * Now read the actual paragraph, word by word, adding to
+ * the paragraph list.
+ *
+ * Mid-paragraph commands:
+ *
+ * \K \k
+ * \c \cw
+ * \e
+ * \i \ii
+ * \I
+ * \u
+ * \W
+ * \date
+ * \\ \{ \}
+ */
+ parsestk = stk_new();
+ style = word_Normal;
+ spcstyle = word_WhiteSpace;
+ indexing = FALSE;
+ seenwhite = TRUE;
+ while (t.type != tok_eop && t.type != tok_eof)
+ {
+ iswhite = FALSE;
+ already = FALSE;
+
+ /* Handle implicit paragraph breaks after \IM, \BR etc */
+ if (start_cmd != c__invalid &&
+ t.type == tok_cmd && t.cmd == start_cmd)
+ {
+ already = TRUE; /* inhibit get_token at top of loop */
+ break;
+ }
+
+ if (t.type == tok_cmd && t.cmd == c__escaped)
+ {
+ t.type = tok_word; /* nice and simple */
+ t.aux = 0; /* even if `\-' - nonbreaking! */
+ }
+ if (t.type == tok_cmd && t.cmd == c__nbsp)
+ {
+ t.type = tok_word; /* nice and simple */
+ sfree(t.text);
+ t.text = ustrdup(L" "); /* text is ` ' not `_' */
+ t.aux = 0; /* (nonbreaking) */
+ }
+ switch (t.type)
+ {
+ case tok_white:
+ if (whptr == &par.words)
+ break; /* strip whitespace at start of para */
+ wd.text = NULL;
+ wd.type = spcstyle;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+
+ /*
+ * Inhibit use of whitespace if it's (probably the
+ * newline) before a repeat \IM / \BR type
+ * directive.
+ */
+ if (start_cmd != c__invalid)
+ {
+ dtor(t), t = get_token(in);
+ already = TRUE;
+ if (t.type == tok_cmd && t.cmd == start_cmd)
+ break;
+ }
+
+ if (indexing)
+ rdadd(&indexstr, ' ');
+ if (!indexing || index_visible)
+ addword(wd, &whptr);
+ if (indexing)
+ addword(wd, &idximplicit);
+ iswhite = TRUE;
+ break;
+ case tok_word:
+ if (indexing)
+ rdadds(&indexstr, t.text);
+ wd.type = style;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.fpos = t.pos;
+ wd.breaks = t.aux;
+ if (!indexing || index_visible)
+ {
+ wd.text = ustrdup(t.text);
+ addword(wd, &whptr);
+ }
+ if (indexing)
+ {
+ wd.text = ustrdup(t.text);
+ addword(wd, &idximplicit);
+ }
+ break;
+ case tok_lbrace:
+ error(err_unexbrace, &t.pos);
+ /* Error recovery: push nop */
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_nop;
+ stk_push(parsestk, sitem);
+ break;
+ case tok_rbrace:
+ sitem = stk_pop(parsestk);
+ if (!sitem)
+ error(err_unexbrace, &t.pos);
+ else
+ {
+ if (sitem->type & stack_ualt)
+ {
+ whptr = sitem->whptr;
+ idximplicit = sitem->idximplicit;
+ }
+ if (sitem->type & stack_style)
+ {
+ style = word_Normal;
+ spcstyle = word_WhiteSpace;
+ }
+ if (sitem->type & stack_idx ) {
+ indexword->text = ustrdup(indexstr.text);
+ if (index_downcase)
+ ustrlow(indexword->text);
+ indexing = FALSE;
+ rdadd(&indexstr, L'\0');
+ index_merge(idx, FALSE, indexstr.text, idxwordlist);
+ sfree(indexstr.text);
+ }
+ if (sitem->type & stack_hyper)
+ {
+ wd.text = NULL;
+ wd.type = word_HyperEnd;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (!indexing || index_visible)
+ addword(wd, &whptr);
+ if (indexing)
+ addword(wd, &idximplicit);
+ }
+ if (sitem->type & stack_quote)
+ {
+ wd.text = NULL;
+ wd.type = toquotestyle(style);
+ wd.alt = NULL;
+ wd.aux = quote_Close;
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (!indexing || index_visible)
+ addword(wd, &whptr);
+ if (indexing)
+ {
+ rdadd(&indexstr, L'"');
+ addword(wd, &idximplicit);
+ }
+ }
+ }
+ sfree(sitem);
+ break;
+ case tok_cmd:
+ switch (t.cmd)
+ {
+ case c__comment:
+ /*
+ * In-paragraph comment: \#{ balanced braces }
+ *
+ * Anything goes here; even tok_eop. We should
+ * eat whitespace after the close brace _if_
+ * there was whitespace before the \#.
+ */
+ dtor(t), t = get_token(in);
+ if (t.type != tok_lbrace)
+ {
+ error(err_explbr, &t.pos);
+ } else
+ {
+ int braces = 1;
+ while (braces > 0)
+ {
+ dtor(t), t = get_token(in);
+ if (t.type == tok_lbrace)
+ braces++;
+ else if (t.type == tok_rbrace)
+ braces--;
+ else if (t.type == tok_eof)
+ {
+ error(err_commenteof, &t.pos);
+ break;
+ }
+ }
+ }
+ if (seenwhite)
+ {
+ already = TRUE;
+ dtor(t), t = get_token(in);
+ if (t.type == tok_white)
+ {
+ iswhite = TRUE;
+ already = FALSE;
+ }
+ }
+ break;
+ case c_q:
+ dtor(t), t = get_token(in);
+ if (t.type != tok_lbrace)
+ {
+ error(err_explbr, &t.pos);
+ } else
+ {
+ wd.text = NULL;
+ wd.type = toquotestyle(style);
+ wd.alt = NULL;
+ wd.aux = quote_Open;
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (!indexing || index_visible)
+ addword(wd, &whptr);
+ if (indexing)
+ {
+ rdadd(&indexstr, L'"');
+ addword(wd, &idximplicit);
+ }
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_quote;
+ stk_push(parsestk, sitem);
+ }
+ break;
+ case c_K:
+ case c_k:
+ case c_R:
+ case c_W:
+ case c_L:
+ case c_date:
+ /*
+ * Keyword, hyperlink, or \date. We expect a
+ * left brace, some text, and then a right
+ * brace. No nesting; no arguments.
+ */
+ wd.fpos = t.pos;
+ wd.breaks = FALSE;
+ if (t.cmd == c_K)
+ wd.type = word_UpperXref;
+ else if (t.cmd == c_k)
+ wd.type = word_LowerXref;
+ else if (t.cmd == c_R)
+ wd.type = word_FreeTextXref;
+ else if (t.cmd == c_W)
+ wd.type = word_HyperLink;
+ else if (t.cmd == c_L)
+ wd.type = word_LocalHyperLink;
+ else
+ wd.type = word_Normal;
+ dtor(t), t = get_token(in);
+ if (t.type != tok_lbrace)
+ {
+ if (wd.type == word_Normal)
+ {
+ time_t thetime = time(NULL);
+ struct tm *broken = localtime(&thetime);
+ already = TRUE;
+ wdtext = ustrftime(NULL, broken);
+ wd.type = style;
+ } else
+ {
+ error(err_explbr, &t.pos);
+ wdtext = NULL;
+ }
+ } else
+ {
+ rdstring rs = { 0, 0, NULL };
+ while (dtor(t), t = get_token(in),
+ t.type == tok_word || t.type == tok_white)
+ {
+ if (t.type == tok_white)
+ rdadd(&rs, ' ');
+ else
+ rdadds(&rs, t.text);
+ }
+ if (wd.type == word_Normal)
+ {
+ time_t thetime = time(NULL);
+ struct tm *broken = localtime(&thetime);
+ wdtext = ustrftime(rs.text, broken);
+ wd.type = style;
+ } else
+ {
+ wdtext = ustrdup(rs.text);
+ }
+ sfree(rs.text);
+ if (t.type != tok_rbrace)
+ {
+ error(err_kwexprbr, &t.pos);
+ }
+ }
+ wd.alt = NULL;
+ wd.aux = 0;
+ if (!indexing || index_visible)
+ {
+ wd.text = ustrdup(wdtext);
+ addword(wd, &whptr);
+ }
+ if (indexing)
+ {
+ wd.text = ustrdup(wdtext);
+ addword(wd, &idximplicit);
+ }
+ sfree(wdtext);
+ if (wd.type == word_FreeTextXref || wd.type == word_HyperLink || wd.type == word_LocalHyperLink)
+ {
+ /*
+ * Hyperlinks are different: they then
+ * expect another left brace, to begin
+ * delimiting the text marked by the link.
+ */
+ dtor(t), t = get_token(in);
+ /*
+ * Special cases: \W{}\c, \W{}\e, \W{}\cw
+ */
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_hyper;
+ if (t.type == tok_cmd &&
+ (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
+ {
+ if (style != word_Normal)
+ error(err_nestedstyles, &t.pos);
+ else
+ {
+ style = (t.cmd == c_c ? word_Code :
+ t.cmd == c_cw ? word_WeakCode : word_Emph);
+ spcstyle = tospacestyle(style);
+ sitem->type |= stack_style;
+ }
+ dtor(t), t = get_token(in);
+ }
+ if (t.type != tok_lbrace)
+ {
+ error(err_explbr, &t.pos);
+ sfree(sitem);
+ } else
+ {
+ stk_push(parsestk, sitem);
+ }
+ }
+ break;
+ case c_c:
+ case c_cw:
+ case c_e:
+ type = t.cmd;
+ if (style != word_Normal)
+ {
+ error(err_nestedstyles, &t.pos);
+ /* Error recovery: eat lbrace, push nop. */
+ dtor(t), t = get_token(in);
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_nop;
+ stk_push(parsestk, sitem);
+ }
+ dtor(t), t = get_token(in);
+ if (t.type != tok_lbrace)
+ {
+ error(err_explbr, &t.pos);
+ } else
+ {
+ style = (type == c_c ? word_Code :
+ type == c_cw ? word_WeakCode : word_Emph);
+ spcstyle = tospacestyle(style);
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_style;
+ stk_push(parsestk, sitem);
+ }
+ break;
+ case c_i:
+ case c_ii:
+ case c_I:
+ type = t.cmd;
+ if (indexing)
+ {
+ error(err_nestedindex, &t.pos);
+ /* Error recovery: eat lbrace, push nop. */
+ dtor(t), t = get_token(in);
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_nop;
+ stk_push(parsestk, sitem);
+ }
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_idx;
+ dtor(t), t = get_token(in);
+ /*
+ * Special cases: \i\c, \i\e, \i\cw
+ */
+ wd.fpos = t.pos;
+ if (t.type == tok_cmd &&
+ (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
+ {
+ if (style != word_Normal)
+ error(err_nestedstyles, &t.pos);
+ else
+ {
+ style = (t.cmd == c_c ? word_Code :
+ t.cmd == c_cw ? word_WeakCode : word_Emph);
+ spcstyle = tospacestyle(style);
+ sitem->type |= stack_style;
+ }
+ dtor(t), t = get_token(in);
+ }
+ if (t.type != tok_lbrace)
+ {
+ sfree(sitem);
+ error(err_explbr, &t.pos);
+ } else
+ {
+ /* Add an index-reference word with no text as yet */
+ wd.type = word_IndexRef;
+ wd.text = NULL;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.breaks = FALSE;
+ indexword = addword(wd, &whptr);
+ /* Set up a rdstring to read the index text */
+ indexstr = nullrs;
+ /* Flags so that we do the Right Things with text */
+ index_visible = (type != c_I);
+ index_downcase = (type == c_ii);
+ indexing = TRUE;
+ idxwordlist = NULL;
+ idximplicit = &idxwordlist;
+ /* Stack item to close the indexing on exit */
+ stk_push(parsestk, sitem);
+ }
+ break;
+ case c_u:
+ uchr = t.aux;
+ utext[0] = uchr;
+ utext[1] = 0;
+ wd.type = style;
+ wd.breaks = FALSE;
+ wd.alt = NULL;
+ wd.aux = 0;
+ wd.fpos = t.pos;
+ if (!indexing || index_visible)
+ {
+ wd.text = ustrdup(utext);
+ uword = addword(wd, &whptr);
+ } else
+ uword = NULL;
+ if (indexing)
+ {
+ wd.text = ustrdup(utext);
+ iword = addword(wd, &idximplicit);
+ } else
+ iword = NULL;
+ dtor(t), t = get_token(in);
+ if (t.type == tok_lbrace)
+ {
+ /*
+ * \u with a left brace. Until the brace
+ * closes, all further words go on a
+ * sidetrack from the main thread of the
+ * paragraph.
+ */
+ sitem = mknew(struct stack_item);
+ sitem->type = stack_ualt;
+ sitem->whptr = whptr;
+ sitem->idximplicit = idximplicit;
+ stk_push(parsestk, sitem);
+ whptr = uword ? &uword->alt : NULL;
+ idximplicit = iword ? &iword->alt : NULL;
+ } else
+ {
+ if (indexing)
+ rdadd(&indexstr, uchr);
+ already = TRUE;
+ }
+ break;
+ default:
+ if (!macrolookup(macros, in, t.text, &t.pos))
+ error(err_badmidcmd, t.text, &t.pos);
+ break;
+ }
+ }
+ if (!already)
+ dtor(t), t = get_token(in);
+ seenwhite = iswhite;
+ }
+ /* Check the stack is empty */
+ if (NULL != (sitem = stk_pop(parsestk)))
+ {
+ do
+ {
+ sfree(sitem);
+ sitem = stk_pop(parsestk);
+ }
+ while (sitem);
+ error(err_missingrbrace, &t.pos);
+ }
+ stk_free(parsestk);
+ addpara(par, ret);
+ }
+
+ /*
+ * We break to here rather than returning, because otherwise
+ * this cleanup doesn't happen.
+ */
+ dtor(t);
+ macrocleanup(macros);
+}
+
+paragraph *read_input(input * in, indexdata * idx)
+{
+ paragraph *head = NULL;
+ paragraph **hptr = &head;
+
+ while (in->currindex < in->nfiles)
+ {
+ in->currfp = fopen(in->filenames[in->currindex], "r");
+ if (in->currfp)
+ {
+ setpos(in, in->filenames[in->currindex]);
+ read_file(&hptr, in, idx);
+ }
+ in->currindex++;
+ }
+
+ return head;
+}