diff options
author | Didier Raboud <odyx@debian.org> | 2018-03-31 20:38:19 +0200 |
---|---|---|
committer | Didier Raboud <odyx@debian.org> | 2018-03-31 20:38:19 +0200 |
commit | f05798f0619384fdb055f634ca4233378f2779dd (patch) | |
tree | b1f9b212f77580c824cc765ac3778fc6c8f4d4d8 /Docs/src/bin/halibut/input.c | |
parent | 59c41c0897494001ced424157660d4ee59bb5426 (diff) |
Import Upstream version 2.32
Diffstat (limited to 'Docs/src/bin/halibut/input.c')
-rwxr-xr-x | Docs/src/bin/halibut/input.c | 2976 |
1 files changed, 1488 insertions, 1488 deletions
diff --git a/Docs/src/bin/halibut/input.c b/Docs/src/bin/halibut/input.c index f8e4f71..c14f10e 100755 --- a/Docs/src/bin/halibut/input.c +++ b/Docs/src/bin/halibut/input.c @@ -1,1488 +1,1488 @@ -/*
- * input.c: read the source form
- */
-
-#include <stdio.h>
-#include <assert.h>
-#include <time.h>
-#include "halibut.h"
-
-#define TAB_STOP 8 /* for column number tracking */
-
-static void setpos(input * in, char *fname)
-{
- in->pos.filename = fname;
- in->pos.line = 1;
- in->pos.col = (in->reportcols ? 1 : -1);
-}
-
-static void unget(input * in, int c, filepos * pos)
-{
- if (in->npushback >= in->pushbacksize)
- {
- in->pushbacksize = in->npushback + 16;
- in->pushback = resize(in->pushback, in->pushbacksize);
- }
- in->pushback[in->npushback].chr = c;
- in->pushback[in->npushback].pos = *pos; /* structure copy */
- in->npushback++;
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- * Macro subsystem
- */
-typedef struct macro_Tag macro;
-struct macro_Tag {
- wchar_t *name, *text;
-};
-struct macrostack_Tag {
- macrostack *next;
- wchar_t *text;
- int ptr, npushback;
- filepos pos;
-};
-static int macrocmp(void *av, void *bv)
-{
- macro *a = (macro *) av, *b = (macro *) bv;
- return ustrcmp(a->name, b->name);
-}
-static void
-macrodef(tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos)
-{
- macro *m = mknew(macro);
- m->name = name;
- m->text = text;
- if (add234(macros, m) != m)
- {
- error(err_macroexists, &fpos, name);
- sfree(name);
- sfree(text);
- }
-}
-static int
-macrolookup(tree234 * macros, input * in, wchar_t * name, filepos * pos)
-{
- macro m, *gotit;
- m.name = name;
- gotit = find234(macros, &m, NULL);
- if (gotit)
- {
- macrostack *expansion = mknew(macrostack);
- expansion->next = in->stack;
- expansion->text = gotit->text;
- expansion->pos = *pos; /* structure copy */
- expansion->ptr = 0;
- expansion->npushback = in->npushback;
- in->stack = expansion;
- return TRUE;
- } else
- return FALSE;
-}
-static void macrocleanup(tree234 * macros)
-{
- int ti;
- macro *m;
- for (ti = 0; (m = (macro *) index234(macros, ti)) != NULL; ti++)
- {
- sfree(m->name);
- sfree(m->text);
- sfree(m);
- }
- freetree234(macros);
-}
-
-/*
- * Can return EOF
- */
-static int get(input * in, filepos * pos)
-{
- int pushbackpt = in->stack ? in->stack->npushback : 0;
- if (in->npushback > pushbackpt)
- {
- --in->npushback;
- if (pos)
- *pos = in->pushback[in->npushback].pos; /* structure copy */
- return in->pushback[in->npushback].chr;
- } else if (in->stack)
- {
- wchar_t c = in->stack->text[in->stack->ptr];
- if (in->stack->text[++in->stack->ptr] == L'\0')
- {
- macrostack *tmp = in->stack;
- in->stack = tmp->next;
- sfree(tmp);
- }
- return c;
- } else if (in->currfp)
- {
- int c = getc(in->currfp);
-
- if (c == EOF)
- {
- fclose(in->currfp);
- in->currfp = NULL;
- }
- /* Track line numbers, for error reporting */
- if (pos)
- *pos = in->pos;
- if (in->reportcols)
- {
- switch (c)
- {
- case '\t':
- in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP;
- break;
- case '\n':
- in->pos.col = 1;
- in->pos.line++;
- break;
- default:
- in->pos.col++;
- break;
- }
- } else
- {
- in->pos.col = -1;
- if (c == '\n')
- in->pos.line++;
- }
- /* FIXME: do input charmap translation. We should be returning
- * Unicode here. */
- return c;
- } else
- return EOF;
-}
-
-/*
- * Lexical analysis of source files.
- */
-typedef struct token_Tag token;
-struct token_Tag {
- int type;
- int cmd, aux;
- wchar_t *text;
- filepos pos;
-};
-enum {
- tok_eof, /* end of file */
- tok_eop, /* end of paragraph */
- tok_white, /* whitespace */
- tok_word, /* a word or word fragment */
- tok_cmd, /* \command */
- tok_lbrace, /* { */
- tok_rbrace /* } */
-};
-
-/* Halibut command keywords. */
-enum {
- c__invalid, /* invalid command */
- c__comment, /* comment command (\#) */
- c__escaped, /* escaped character */
- c__nbsp, /* nonbreaking space */
- c_A, /* appendix heading */
- c_B, /* bibliography entry */
- c_BR, /* bibliography rewrite */
- c_C, /* chapter heading */
- c_H, /* heading */
- c_I, /* invisible index mark */
- c_IM, /* index merge/rewrite */
- c_K, /* capitalised cross-reference */
- c_S, /* aux field is 0, 1, 2, ... */
- c_U, /* unnumbered-chapter heading */
- c_W, /* Web hyperlink */
- c_L, /* Relative/local hyperlink */
- c_b, /* bulletted list */
- c_c, /* code */
- c_cfg, /* configuration directive */
- c_copyright, /* copyright statement */
- c_cw, /* weak code */
- c_date, /* document processing date */
- c_define, /* macro definition */
- c_e, /* emphasis */
- c_i, /* visible index mark */
- c_ii, /* uncapitalised visible index mark */
- c_k, /* uncapitalised cross-reference */
- c_R, /* free text cross-reference */
- c_n, /* numbered list */
- c_nocite, /* bibliography trickery */
- c_preamble, /* document preamble text */
- c_q, /* quote marks */
- c_rule, /* horizontal rule */
- c_title, /* document title */
- c_u, /* aux field is char code */
- c_versionid /* document RCS id */
-};
-
-/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
-#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
-#define isnl(c) ( (c)==10 )
-#define isdec(c) ( ((c)>='0'&&(c)<='9') )
-#define fromdec(c) ( (c)-'0' )
-#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
-#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
-#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
-
-/*
- * Keyword comparison function. Like strcmp, but between a wchar_t *
- * and a char *.
- */
-static int kwcmp(wchar_t const *p, char const *q)
-{
- int i;
- do
- {
- i = *p - *q;
- }
- while (*p++ && *q++ && !i);
- return i;
-}
-
-/*
- * Match a keyword.
- */
-static void match_kw(token * tok)
-{
- /*
- * FIXME. The ids are explicit in here so as to allow long-name
- * equivalents to the various very short keywords.
- *
- * This list must be sorted, it's searched using binary search.
- */
- static const struct {
- char const *name;
- int id;
- } keywords[] = {
- {
- "#", c__comment}
- , /* comment command (\#) */
- {
- "-", c__escaped}
- , /* nonbreaking hyphen */
- {
- "A", c_A}
- , /* appendix heading */
- {
- "B", c_B}
- , /* bibliography entry */
- {
- "BR", c_BR}
- , /* bibliography rewrite */
- {
- "C", c_C}
- , /* chapter heading */
- {
- "H", c_H}
- , /* heading */
- {
- "I", c_I}
- , /* invisible index mark */
- {
- "IM", c_IM}
- , /* index merge/rewrite */
- {
- "K", c_K}
- , /* capitalised cross-reference */
- {
- "L", c_L}
- , /* Relative/local hyperlink */
- {
- "R", c_R}
- , /* free text cross-reference */
- {
- "U", c_U}
- , /* unnumbered-chapter heading */
- {
- "W", c_W}
- , /* Web hyperlink */
- {
- "\\", c__escaped}
- , /* escaped backslash (\\) */
- {
- "_", c__nbsp}
- , /* nonbreaking space (\_) */
- {
- "b", c_b}
- , /* bulletted list */
- {
- "c", c_c}
- , /* code */
- {
- "cfg", c_cfg}
- , /* configuration directive */
- {
- "copyright", c_copyright}
- , /* copyright statement */
- {
- "cw", c_cw}
- , /* weak code */
- {
- "date", c_date}
- , /* document processing date */
- {
- "define", c_define}
- , /* macro definition */
- {
- "e", c_e}
- , /* emphasis */
- {
- "i", c_i}
- , /* visible index mark */
- {
- "ii", c_ii}
- , /* uncapitalised visible index mark */
- {
- "k", c_k}
- , /* uncapitalised cross-reference */
- {
- "n", c_n}
- , /* numbered list */
- {
- "nocite", c_nocite}
- , /* bibliography trickery */
- {
- "preamble", c_preamble}
- , /* document preamble text */
- {
- "q", c_q}
- , /* quote marks */
- {
- "rule", c_rule}
- , /* horizontal rule */
- {
- "title", c_title}
- , /* document title */
- {
- "versionid", c_versionid}
- , /* document RCS id */
- {
- "{", c__escaped}
- , /* escaped lbrace (\{) */
- {
- "}", c__escaped}
- , /* escaped rbrace (\}) */
- };
- int i, j, k, c;
-
- /*
- * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
- * doesn't match correctly, we just fall through to the
- * binary-search phase.
- */
- if (tok->text[0] == 'S')
- {
- /* We expect numeric characters thereafter. */
- wchar_t *p = tok->text + 1;
- int n;
- if (!*p)
- n = 1;
- else
- {
- n = 0;
- while (*p && isdec(*p))
- {
- n = 10 * n + fromdec(*p);
- p++;
- }
- }
- if (!*p)
- {
- tok->cmd = c_S;
- tok->aux = n;
- return;
- }
- } else if (tok->text[0] == 'u')
- {
- /* We expect hex characters thereafter. */
- wchar_t *p = tok->text + 1;
- int n = 0;
- while (*p && ishex(*p))
- {
- n = 16 * n + fromhex(*p);
- p++;
- }
- if (!*p)
- {
- tok->cmd = c_u;
- tok->aux = n;
- return;
- }
- }
-
- i = -1;
- j = sizeof(keywords) / sizeof(*keywords);
- while (j - i > 1)
- {
- k = (i + j) / 2;
- c = kwcmp(tok->text, keywords[k].name);
- if (c < 0)
- j = k;
- else if (c > 0)
- i = k;
- else
- { /* c == 0 */
-
- tok->cmd = keywords[k].id;
- return;
- }
- }
-
- tok->cmd = c__invalid;
-}
-
-
-/*
- * Read a token from the input file, in the normal way (`normal' in
- * the sense that code paragraphs work a different way).
- */
-token get_token(input * in)
-{
- int c;
- int nls;
- token ret;
- rdstring rs = { 0, 0, NULL };
- filepos cpos;
-
- ret.cmd = c__invalid;
- ret.aux = FALSE;
- ret.text = NULL; /* default */
- c = get(in, &cpos);
- ret.pos = cpos;
- if (iswhite(c))
- { /* tok_white or tok_eop */
- nls = 0;
- do
- {
- if (isnl(c))
- nls++;
- }
- while ((c = get(in, &cpos)) != EOF && iswhite(c));
- if (c == EOF)
- {
- ret.type = tok_eof;
- return ret;
- }
- unget(in, c, &cpos);
- ret.type = (nls > 1 ? tok_eop : tok_white);
- return ret;
- } else if (c == EOF)
- { /* tok_eof */
- ret.type = tok_eof;
- return ret;
- } else if (c == '\\')
- { /* tok_cmd */
- c = get(in, &cpos);
- if (c == '-' || c == '\\' || c == '_' ||
- c == '#' || c == '{' || c == '}')
- {
- /* single-char command */
- rdadd(&rs, (wchar_t)c);
- } else if (c == 'u')
- {
- int len = 0;
- do
- {
- rdadd(&rs, (wchar_t)c);
- len++;
- c = get(in, &cpos);
- }
- while (ishex(c) && len < 5);
- unget(in, c, &cpos);
- } else if (iscmd(c))
- {
- do
- {
- rdadd(&rs, (wchar_t)c);
- c = get(in, &cpos);
- }
- while (iscmd(c));
- unget(in, c, &cpos);
- }
- /*
- * Now match the command against the list of available
- * ones.
- */
- ret.type = tok_cmd;
- ret.text = ustrdup(rs.text);
- match_kw(&ret);
- sfree(rs.text);
- return ret;
- } else if (c == '{')
- { /* tok_lbrace */
- ret.type = tok_lbrace;
- return ret;
- } else if (c == '}')
- { /* tok_rbrace */
- ret.type = tok_rbrace;
- return ret;
- } else
- { /* tok_word */
- /*
- * Read a word: the longest possible contiguous sequence of
- * things other than whitespace, backslash, braces and
- * hyphen. A hyphen terminates the word but is returned as
- * part of it; everything else is pushed back for the next
- * token. The `aux' field contains TRUE if the word ends in
- * a hyphen.
- */
- ret.aux = FALSE; /* assumed for now */
- while (1)
- {
- if (iswhite(c) || c == '{' || c == '}' || c == '\\' || c == EOF)
- {
- /* Put back the character that caused termination */
- unget(in, c, &cpos);
- break;
- } else
- {
- rdadd(&rs, (wchar_t)c);
- if (c == '-')
- {
- ret.aux = TRUE;
- break; /* hyphen terminates word */
- }
- }
- c = get(in, &cpos);
- }
- ret.type = tok_word;
- ret.text = ustrdup(rs.text);
- sfree(rs.text);
- return ret;
- }
-}
-
-/*
- * Determine whether the next input character is an open brace (for
- * telling code paragraphs from paragraphs which merely start with
- * code).
- */
-int isbrace(input * in)
-{
- int c;
- filepos cpos;
-
- c = get(in, &cpos);
- unget(in, c, &cpos);
- return (c == '{');
-}
-
-/*
- * Read the rest of a line that starts `\c'. Including nothing at
- * all (tok_word with empty text).
- */
-token get_codepar_token(input * in)
-{
- int c;
- token ret;
- rdstring rs = { 0, 0, NULL };
- filepos cpos;
-
- ret.type = tok_word;
- c = get(in, &cpos); /* expect (and discard) one space */
- ret.pos = cpos;
- if (c == ' ')
- {
- c = get(in, &cpos);
- ret.pos = cpos;
- }
- while (!isnl(c) && c != EOF)
- {
- int c2 = c;
- c = get(in, &cpos);
- /* Discard \r just before \n. */
- if (c2 != 13 || !isnl(c))
- rdadd(&rs, (wchar_t)c2);
- }
- unget(in, c, &cpos);
- ret.text = ustrdup(rs.text);
- sfree(rs.text);
- return ret;
-}
-
-/*
- * Adds a new word to a linked list
- */
-static word *addword(word newword, word *** hptrptr)
-{
- word *mnewword;
- if (!hptrptr)
- return NULL;
- mnewword = mknew(word);
- *mnewword = newword; /* structure copy */
- mnewword->next = NULL;
- **hptrptr = mnewword;
- *hptrptr = &mnewword->next;
- return mnewword;
-}
-
-/*
- * Adds a new paragraph to a linked list
- */
-static paragraph *addpara(paragraph newpara, paragraph *** hptrptr)
-{
- paragraph *mnewpara = mknew(paragraph);
- *mnewpara = newpara; /* structure copy */
- mnewpara->next = NULL;
- **hptrptr = mnewpara;
- *hptrptr = &mnewpara->next;
- return mnewpara;
-}
-
-/*
- * Destructor before token is reassigned; should catch most memory
- * leaks
- */
-#define dtor(t) ( sfree(t.text) )
-
-/*
- * Reads a single file (ie until get() returns EOF)
- */
-static void read_file(paragraph *** ret, input * in, indexdata * idx)
-{
- token t;
- paragraph par;
- word wd, **whptr, **idximplicit;
- tree234 *macros;
- wchar_t utext[2], *wdtext;
- int style, spcstyle;
- int already;
- int iswhite, seenwhite;
- int type;
- struct stack_item {
- enum {
- stack_nop = 0, /* do nothing (for error recovery) */
- stack_ualt = 1, /* \u alternative */
- stack_style = 2, /* \e, \c, \cw */
- stack_idx = 4, /* \I, \i, \ii */
- stack_hyper = 8, /* \W */
- stack_quote = 16, /* \q */
- } type;
- word **whptr; /* to restore from \u alternatives */
- word **idximplicit; /* to restore from \u alternatives */
- } *sitem;
- stack parsestk;
- word *indexword=NULL, *uword=NULL, *iword=NULL;
- word *idxwordlist;
- rdstring indexstr;
- int index_downcase=0, index_visible=0, indexing=0;
- const rdstring nullrs = { 0, 0, NULL };
- wchar_t uchr;
-
- t.text = NULL;
- macros = newtree234(macrocmp);
- already = FALSE;
-
- /*
- * Loop on each paragraph.
- */
- while (1)
- {
- int start_cmd = c__invalid;
- par.words = NULL;
- par.keyword = NULL;
- whptr = &par.words;
-
- /*
- * Get a token.
- */
- if (!already)
- {
- dtor(t), t = get_token(in);
- }
- already = FALSE;
- if (t.type == tok_eof)
- break;
-
- /*
- * Parse code paragraphs separately.
- */
- if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in))
- {
- par.type = para_Code;
- par.fpos = t.pos;
- while (1)
- {
- dtor(t), t = get_codepar_token(in);
- wd.type = word_WeakCode;
- wd.breaks = FALSE; /* shouldn't need this... */
- wd.text = ustrdup(t.text);
- wd.alt = NULL;
- wd.fpos = t.pos;
- addword(wd, &whptr);
- dtor(t), t = get_token(in);
- if (t.type == tok_white)
- {
- /*
- * The newline after a code-paragraph line
- */
- dtor(t), t = get_token(in);
- }
- if (t.type == tok_eop || t.type == tok_eof)
- break;
- else if (t.type != tok_cmd || t.cmd != c_c)
- {
- error(err_brokencodepara, &t.pos);
- addpara(par, ret);
- while (t.type != tok_eop) /* error recovery: */
- dtor(t), t = get_token(in); /* eat rest of paragraph */
- goto codeparabroken; /* ick, but such is life */
- }
- }
- addpara(par, ret);
- codeparabroken:
- continue;
- }
-
- while (t.type == tok_cmd && macrolookup(macros, in, t.text, &t.pos))
- {
- dtor(t), t = get_token(in);
- }
-
-
- /*
- * This token begins a paragraph. See if it's one of the
- * special commands that define a paragraph type.
- *
- * (note that \# is special in a way, and \nocite takes no
- * text)
- */
- par.type = para_Normal;
- if (t.type == tok_cmd)
- {
- int needkw=0;
- int is_macro = FALSE;
-
- par.fpos = t.pos;
- switch (t.cmd)
- {
- default:
- needkw = -1;
- break;
- case c__invalid:
- error(err_badparatype, t.text, &t.pos);
- needkw = 4;
- break;
- case c__comment:
- if (isbrace(in))
- break; /* `\#{': isn't a comment para */
- do
- {
- dtor(t), t = get_token(in);
- }
- while (t.type != tok_eop && t.type != tok_eof);
- continue; /* next paragraph */
- /*
- * `needkw' values:
- *
- * 1 -- exactly one keyword
- * 2 -- at least one keyword
- * 4 -- any number of keywords including zero
- * 8 -- at least one keyword and then nothing else
- * 16 -- nothing at all! no keywords, no body
- * 32 -- no keywords at all
- */
- case c_A:
- needkw = 2;
- par.type = para_Appendix;
- break;
- case c_B:
- needkw = 2;
- par.type = para_Biblio;
- break;
- case c_BR:
- needkw = 1;
- par.type = para_BR;
- start_cmd = c_BR;
- break;
- case c_C:
- needkw = 2;
- par.type = para_Chapter;
- break;
- case c_H:
- needkw = 2;
- par.type = para_Heading;
- par.aux = 0;
- break;
- case c_IM:
- needkw = 2;
- par.type = para_IM;
- start_cmd = c_IM;
- break;
- case c_S:
- needkw = 2;
- par.type = para_Subsect;
- par.aux = t.aux;
- break;
- case c_U:
- needkw = 32;
- par.type = para_UnnumberedChapter;
- break;
- /* For \b and \n the keyword is optional */
- case c_b:
- needkw = 4;
- par.type = para_Bullet;
- break;
- case c_n:
- needkw = 4;
- par.type = para_NumberedList;
- break;
- case c_cfg:
- needkw = 8;
- par.type = para_Config;
- start_cmd = c_cfg;
- break;
- case c_copyright:
- needkw = 32;
- par.type = para_Copyright;
- break;
- case c_define:
- is_macro = TRUE;
- needkw = 1;
- break;
- /* For \nocite the keyword is _everything_ */
- case c_nocite:
- needkw = 8;
- par.type = para_NoCite;
- break;
- case c_preamble:
- needkw = 32;
- par.type = para_Preamble;
- break;
- case c_rule:
- needkw = 16;
- par.type = para_Rule;
- break;
- case c_title:
- needkw = 32;
- par.type = para_Title;
- break;
- case c_versionid:
- needkw = 32;
- par.type = para_VersionID;
- break;
- }
-
- if (needkw > 0)
- {
- rdstring rs = { 0, 0, NULL };
- int nkeys = 0;
- filepos fp;
-
- /* Get keywords. */
- dtor(t), t = get_token(in);
- fp = t.pos;
- while (t.type == tok_lbrace)
- {
- /* This is a keyword. */
- nkeys++;
- /* FIXME: there will be bugs if anyone specifies an
- * empty keyword (\foo{}), so trap this case. */
- while (dtor(t), t = get_token(in),
- t.type == tok_word ||
- t.type == tok_white ||
- (t.type == tok_cmd && t.cmd == c__nbsp) ||
- (t.type == tok_cmd && t.cmd == c__escaped))
- {
- if (t.type == tok_white ||
- (t.type == tok_cmd && t.cmd == c__nbsp))
- rdadd(&rs, ' ');
- else
- rdadds(&rs, t.text);
- }
- if (t.type != tok_rbrace)
- {
- error(err_kwunclosed, &t.pos);
- continue;
- }
- rdadd(&rs, 0); /* add string terminator */
- dtor(t), t = get_token(in); /* eat right brace */
- }
-
- rdadd(&rs, 0); /* add string terminator */
-
- /* See whether we have the right number of keywords. */
- if ((needkw & 48) && nkeys > 0)
- error(err_kwillegal, &fp);
- if ((needkw & 11) && nkeys == 0)
- error(err_kwexpected, &fp);
- if ((needkw & 5) && nkeys > 1)
- error(err_kwtoomany, &fp);
-
- if (is_macro)
- {
- /*
- * Macro definition. Get the rest of the line
- * as a code-paragraph token, repeatedly until
- * there's nothing more left of it. Separate
- * with newlines.
- */
- rdstring macrotext = { 0, 0, NULL };
- while (1)
- {
- dtor(t), t = get_codepar_token(in);
- if (macrotext.pos > 0)
- rdadd(¯otext, L'\n');
- rdadds(¯otext, t.text);
- dtor(t), t = get_token(in);
- if (t.type == tok_eop)
- break;
- }
- macrodef(macros, rs.text, macrotext.text, fp);
- continue; /* next paragraph */
- }
-
- par.keyword = rdtrim(&rs);
-
- /* Move to EOP in case of needkw==8 or 16 (no body) */
- if (needkw & 24)
- {
- /* We allow whitespace even when we expect no para body */
- while (t.type == tok_white)
- dtor(t), t = get_token(in);
- if (t.type != tok_eop && t.type != tok_eof &&
- (start_cmd == c__invalid ||
- t.type != tok_cmd || t.cmd != start_cmd))
- {
- error(err_bodyillegal, &t.pos);
- /* Error recovery: eat the rest of the paragraph */
- while (t.type != tok_eop && t.type != tok_eof &&
- (start_cmd == c__invalid ||
- t.type != tok_cmd || t.cmd != start_cmd))
- dtor(t), t = get_token(in);
- }
- if (t.type == tok_cmd)
- already = TRUE; /* inhibit get_token at top of loop */
- addpara(par, ret);
- continue; /* next paragraph */
- }
- }
- }
-
- /*
- * Now read the actual paragraph, word by word, adding to
- * the paragraph list.
- *
- * Mid-paragraph commands:
- *
- * \K \k
- * \c \cw
- * \e
- * \i \ii
- * \I
- * \u
- * \W
- * \date
- * \\ \{ \}
- */
- parsestk = stk_new();
- style = word_Normal;
- spcstyle = word_WhiteSpace;
- indexing = FALSE;
- seenwhite = TRUE;
- while (t.type != tok_eop && t.type != tok_eof)
- {
- iswhite = FALSE;
- already = FALSE;
-
- /* Handle implicit paragraph breaks after \IM, \BR etc */
- if (start_cmd != c__invalid &&
- t.type == tok_cmd && t.cmd == start_cmd)
- {
- already = TRUE; /* inhibit get_token at top of loop */
- break;
- }
-
- if (t.type == tok_cmd && t.cmd == c__escaped)
- {
- t.type = tok_word; /* nice and simple */
- t.aux = 0; /* even if `\-' - nonbreaking! */
- }
- if (t.type == tok_cmd && t.cmd == c__nbsp)
- {
- t.type = tok_word; /* nice and simple */
- sfree(t.text);
- t.text = ustrdup(L" "); /* text is ` ' not `_' */
- t.aux = 0; /* (nonbreaking) */
- }
- switch (t.type)
- {
- case tok_white:
- if (whptr == &par.words)
- break; /* strip whitespace at start of para */
- wd.text = NULL;
- wd.type = spcstyle;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
-
- /*
- * Inhibit use of whitespace if it's (probably the
- * newline) before a repeat \IM / \BR type
- * directive.
- */
- if (start_cmd != c__invalid)
- {
- dtor(t), t = get_token(in);
- already = TRUE;
- if (t.type == tok_cmd && t.cmd == start_cmd)
- break;
- }
-
- if (indexing)
- rdadd(&indexstr, ' ');
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- addword(wd, &idximplicit);
- iswhite = TRUE;
- break;
- case tok_word:
- if (indexing)
- rdadds(&indexstr, t.text);
- wd.type = style;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- wd.breaks = t.aux;
- if (!indexing || index_visible)
- {
- wd.text = ustrdup(t.text);
- addword(wd, &whptr);
- }
- if (indexing)
- {
- wd.text = ustrdup(t.text);
- addword(wd, &idximplicit);
- }
- break;
- case tok_lbrace:
- error(err_unexbrace, &t.pos);
- /* Error recovery: push nop */
- sitem = mknew(struct stack_item);
- sitem->type = stack_nop;
- stk_push(parsestk, sitem);
- break;
- case tok_rbrace:
- sitem = stk_pop(parsestk);
- if (!sitem)
- error(err_unexbrace, &t.pos);
- else
- {
- if (sitem->type & stack_ualt)
- {
- whptr = sitem->whptr;
- idximplicit = sitem->idximplicit;
- }
- if (sitem->type & stack_style)
- {
- style = word_Normal;
- spcstyle = word_WhiteSpace;
- }
- if (sitem->type & stack_idx ) {
- indexword->text = ustrdup(indexstr.text);
- if (index_downcase)
- ustrlow(indexword->text);
- indexing = FALSE;
- rdadd(&indexstr, L'\0');
- index_merge(idx, FALSE, indexstr.text, idxwordlist);
- sfree(indexstr.text);
- }
- if (sitem->type & stack_hyper)
- {
- wd.text = NULL;
- wd.type = word_HyperEnd;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- addword(wd, &idximplicit);
- }
- if (sitem->type & stack_quote)
- {
- wd.text = NULL;
- wd.type = toquotestyle(style);
- wd.alt = NULL;
- wd.aux = quote_Close;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- {
- rdadd(&indexstr, L'"');
- addword(wd, &idximplicit);
- }
- }
- }
- sfree(sitem);
- break;
- case tok_cmd:
- switch (t.cmd)
- {
- case c__comment:
- /*
- * In-paragraph comment: \#{ balanced braces }
- *
- * Anything goes here; even tok_eop. We should
- * eat whitespace after the close brace _if_
- * there was whitespace before the \#.
- */
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- } else
- {
- int braces = 1;
- while (braces > 0)
- {
- dtor(t), t = get_token(in);
- if (t.type == tok_lbrace)
- braces++;
- else if (t.type == tok_rbrace)
- braces--;
- else if (t.type == tok_eof)
- {
- error(err_commenteof, &t.pos);
- break;
- }
- }
- }
- if (seenwhite)
- {
- already = TRUE;
- dtor(t), t = get_token(in);
- if (t.type == tok_white)
- {
- iswhite = TRUE;
- already = FALSE;
- }
- }
- break;
- case c_q:
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- } else
- {
- wd.text = NULL;
- wd.type = toquotestyle(style);
- wd.alt = NULL;
- wd.aux = quote_Open;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- {
- rdadd(&indexstr, L'"');
- addword(wd, &idximplicit);
- }
- sitem = mknew(struct stack_item);
- sitem->type = stack_quote;
- stk_push(parsestk, sitem);
- }
- break;
- case c_K:
- case c_k:
- case c_R:
- case c_W:
- case c_L:
- case c_date:
- /*
- * Keyword, hyperlink, or \date. We expect a
- * left brace, some text, and then a right
- * brace. No nesting; no arguments.
- */
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (t.cmd == c_K)
- wd.type = word_UpperXref;
- else if (t.cmd == c_k)
- wd.type = word_LowerXref;
- else if (t.cmd == c_R)
- wd.type = word_FreeTextXref;
- else if (t.cmd == c_W)
- wd.type = word_HyperLink;
- else if (t.cmd == c_L)
- wd.type = word_LocalHyperLink;
- else
- wd.type = word_Normal;
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- if (wd.type == word_Normal)
- {
- time_t thetime = time(NULL);
- struct tm *broken = localtime(&thetime);
- already = TRUE;
- wdtext = ustrftime(NULL, broken);
- wd.type = style;
- } else
- {
- error(err_explbr, &t.pos);
- wdtext = NULL;
- }
- } else
- {
- rdstring rs = { 0, 0, NULL };
- while (dtor(t), t = get_token(in),
- t.type == tok_word || t.type == tok_white)
- {
- if (t.type == tok_white)
- rdadd(&rs, ' ');
- else
- rdadds(&rs, t.text);
- }
- if (wd.type == word_Normal)
- {
- time_t thetime = time(NULL);
- struct tm *broken = localtime(&thetime);
- wdtext = ustrftime(rs.text, broken);
- wd.type = style;
- } else
- {
- wdtext = ustrdup(rs.text);
- }
- sfree(rs.text);
- if (t.type != tok_rbrace)
- {
- error(err_kwexprbr, &t.pos);
- }
- }
- wd.alt = NULL;
- wd.aux = 0;
- if (!indexing || index_visible)
- {
- wd.text = ustrdup(wdtext);
- addword(wd, &whptr);
- }
- if (indexing)
- {
- wd.text = ustrdup(wdtext);
- addword(wd, &idximplicit);
- }
- sfree(wdtext);
- if (wd.type == word_FreeTextXref || wd.type == word_HyperLink || wd.type == word_LocalHyperLink)
- {
- /*
- * Hyperlinks are different: they then
- * expect another left brace, to begin
- * delimiting the text marked by the link.
- */
- dtor(t), t = get_token(in);
- /*
- * Special cases: \W{}\c, \W{}\e, \W{}\cw
- */
- sitem = mknew(struct stack_item);
- sitem->type = stack_hyper;
- if (t.type == tok_cmd &&
- (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
- {
- if (style != word_Normal)
- error(err_nestedstyles, &t.pos);
- else
- {
- style = (t.cmd == c_c ? word_Code :
- t.cmd == c_cw ? word_WeakCode : word_Emph);
- spcstyle = tospacestyle(style);
- sitem->type |= stack_style;
- }
- dtor(t), t = get_token(in);
- }
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- sfree(sitem);
- } else
- {
- stk_push(parsestk, sitem);
- }
- }
- break;
- case c_c:
- case c_cw:
- case c_e:
- type = t.cmd;
- if (style != word_Normal)
- {
- error(err_nestedstyles, &t.pos);
- /* Error recovery: eat lbrace, push nop. */
- dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
- sitem->type = stack_nop;
- stk_push(parsestk, sitem);
- }
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- } else
- {
- style = (type == c_c ? word_Code :
- type == c_cw ? word_WeakCode : word_Emph);
- spcstyle = tospacestyle(style);
- sitem = mknew(struct stack_item);
- sitem->type = stack_style;
- stk_push(parsestk, sitem);
- }
- break;
- case c_i:
- case c_ii:
- case c_I:
- type = t.cmd;
- if (indexing)
- {
- error(err_nestedindex, &t.pos);
- /* Error recovery: eat lbrace, push nop. */
- dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
- sitem->type = stack_nop;
- stk_push(parsestk, sitem);
- }
- sitem = mknew(struct stack_item);
- sitem->type = stack_idx;
- dtor(t), t = get_token(in);
- /*
- * Special cases: \i\c, \i\e, \i\cw
- */
- wd.fpos = t.pos;
- if (t.type == tok_cmd &&
- (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
- {
- if (style != word_Normal)
- error(err_nestedstyles, &t.pos);
- else
- {
- style = (t.cmd == c_c ? word_Code :
- t.cmd == c_cw ? word_WeakCode : word_Emph);
- spcstyle = tospacestyle(style);
- sitem->type |= stack_style;
- }
- dtor(t), t = get_token(in);
- }
- if (t.type != tok_lbrace)
- {
- sfree(sitem);
- error(err_explbr, &t.pos);
- } else
- {
- /* Add an index-reference word with no text as yet */
- wd.type = word_IndexRef;
- wd.text = NULL;
- wd.alt = NULL;
- wd.aux = 0;
- wd.breaks = FALSE;
- indexword = addword(wd, &whptr);
- /* Set up a rdstring to read the index text */
- indexstr = nullrs;
- /* Flags so that we do the Right Things with text */
- index_visible = (type != c_I);
- index_downcase = (type == c_ii);
- indexing = TRUE;
- idxwordlist = NULL;
- idximplicit = &idxwordlist;
- /* Stack item to close the indexing on exit */
- stk_push(parsestk, sitem);
- }
- break;
- case c_u:
- uchr = t.aux;
- utext[0] = uchr;
- utext[1] = 0;
- wd.type = style;
- wd.breaks = FALSE;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- if (!indexing || index_visible)
- {
- wd.text = ustrdup(utext);
- uword = addword(wd, &whptr);
- } else
- uword = NULL;
- if (indexing)
- {
- wd.text = ustrdup(utext);
- iword = addword(wd, &idximplicit);
- } else
- iword = NULL;
- dtor(t), t = get_token(in);
- if (t.type == tok_lbrace)
- {
- /*
- * \u with a left brace. Until the brace
- * closes, all further words go on a
- * sidetrack from the main thread of the
- * paragraph.
- */
- sitem = mknew(struct stack_item);
- sitem->type = stack_ualt;
- sitem->whptr = whptr;
- sitem->idximplicit = idximplicit;
- stk_push(parsestk, sitem);
- whptr = uword ? &uword->alt : NULL;
- idximplicit = iword ? &iword->alt : NULL;
- } else
- {
- if (indexing)
- rdadd(&indexstr, uchr);
- already = TRUE;
- }
- break;
- default:
- if (!macrolookup(macros, in, t.text, &t.pos))
- error(err_badmidcmd, t.text, &t.pos);
- break;
- }
- }
- if (!already)
- dtor(t), t = get_token(in);
- seenwhite = iswhite;
- }
- /* Check the stack is empty */
- if (NULL != (sitem = stk_pop(parsestk)))
- {
- do
- {
- sfree(sitem);
- sitem = stk_pop(parsestk);
- }
- while (sitem);
- error(err_missingrbrace, &t.pos);
- }
- stk_free(parsestk);
- addpara(par, ret);
- }
-
- /*
- * We break to here rather than returning, because otherwise
- * this cleanup doesn't happen.
- */
- dtor(t);
- macrocleanup(macros);
-}
-
-paragraph *read_input(input * in, indexdata * idx)
-{
- paragraph *head = NULL;
- paragraph **hptr = &head;
-
- while (in->currindex < in->nfiles)
- {
- in->currfp = fopen(in->filenames[in->currindex], "r");
- if (in->currfp)
- {
- setpos(in, in->filenames[in->currindex]);
- read_file(&hptr, in, idx);
- }
- in->currindex++;
- }
-
- return head;
-}
+/* + * input.c: read the source form + */ + +#include <stdio.h> +#include <assert.h> +#include <time.h> +#include "halibut.h" + +#define TAB_STOP 8 /* for column number tracking */ + +static void setpos(input * in, char *fname) +{ + in->pos.filename = fname; + in->pos.line = 1; + in->pos.col = (in->reportcols ? 1 : -1); +} + +static void unget(input * in, int c, filepos * pos) +{ + if (in->npushback >= in->pushbacksize) + { + in->pushbacksize = in->npushback + 16; + in->pushback = resize(in->pushback, in->pushbacksize); + } + in->pushback[in->npushback].chr = c; + in->pushback[in->npushback].pos = *pos; /* structure copy */ + in->npushback++; +} + +/* ---------------------------------------------------------------------- */ +/* + * Macro subsystem + */ +typedef struct macro_Tag macro; +struct macro_Tag { + wchar_t *name, *text; +}; +struct macrostack_Tag { + macrostack *next; + wchar_t *text; + int ptr, npushback; + filepos pos; +}; +static int macrocmp(void *av, void *bv) +{ + macro *a = (macro *) av, *b = (macro *) bv; + return ustrcmp(a->name, b->name); +} +static void +macrodef(tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos) +{ + macro *m = mknew(macro); + m->name = name; + m->text = text; + if (add234(macros, m) != m) + { + error(err_macroexists, &fpos, name); + sfree(name); + sfree(text); + } +} +static int +macrolookup(tree234 * macros, input * in, wchar_t * name, filepos * pos) +{ + macro m, *gotit; + m.name = name; + gotit = find234(macros, &m, NULL); + if (gotit) + { + macrostack *expansion = mknew(macrostack); + expansion->next = in->stack; + expansion->text = gotit->text; + expansion->pos = *pos; /* structure copy */ + expansion->ptr = 0; + expansion->npushback = in->npushback; + in->stack = expansion; + return TRUE; + } else + return FALSE; +} +static void macrocleanup(tree234 * macros) +{ + int ti; + macro *m; + for (ti = 0; (m = (macro *) index234(macros, ti)) != NULL; ti++) + { + sfree(m->name); + sfree(m->text); + sfree(m); + } + freetree234(macros); +} + +/* + * Can return EOF + */ +static int get(input * in, filepos * pos) +{ + int pushbackpt = in->stack ? in->stack->npushback : 0; + if (in->npushback > pushbackpt) + { + --in->npushback; + if (pos) + *pos = in->pushback[in->npushback].pos; /* structure copy */ + return in->pushback[in->npushback].chr; + } else if (in->stack) + { + wchar_t c = in->stack->text[in->stack->ptr]; + if (in->stack->text[++in->stack->ptr] == L'\0') + { + macrostack *tmp = in->stack; + in->stack = tmp->next; + sfree(tmp); + } + return c; + } else if (in->currfp) + { + int c = getc(in->currfp); + + if (c == EOF) + { + fclose(in->currfp); + in->currfp = NULL; + } + /* Track line numbers, for error reporting */ + if (pos) + *pos = in->pos; + if (in->reportcols) + { + switch (c) + { + case '\t': + in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP; + break; + case '\n': + in->pos.col = 1; + in->pos.line++; + break; + default: + in->pos.col++; + break; + } + } else + { + in->pos.col = -1; + if (c == '\n') + in->pos.line++; + } + /* FIXME: do input charmap translation. We should be returning + * Unicode here. */ + return c; + } else + return EOF; +} + +/* + * Lexical analysis of source files. + */ +typedef struct token_Tag token; +struct token_Tag { + int type; + int cmd, aux; + wchar_t *text; + filepos pos; +}; +enum { + tok_eof, /* end of file */ + tok_eop, /* end of paragraph */ + tok_white, /* whitespace */ + tok_word, /* a word or word fragment */ + tok_cmd, /* \command */ + tok_lbrace, /* { */ + tok_rbrace /* } */ +}; + +/* Halibut command keywords. */ +enum { + c__invalid, /* invalid command */ + c__comment, /* comment command (\#) */ + c__escaped, /* escaped character */ + c__nbsp, /* nonbreaking space */ + c_A, /* appendix heading */ + c_B, /* bibliography entry */ + c_BR, /* bibliography rewrite */ + c_C, /* chapter heading */ + c_H, /* heading */ + c_I, /* invisible index mark */ + c_IM, /* index merge/rewrite */ + c_K, /* capitalised cross-reference */ + c_S, /* aux field is 0, 1, 2, ... */ + c_U, /* unnumbered-chapter heading */ + c_W, /* Web hyperlink */ + c_L, /* Relative/local hyperlink */ + c_b, /* bulletted list */ + c_c, /* code */ + c_cfg, /* configuration directive */ + c_copyright, /* copyright statement */ + c_cw, /* weak code */ + c_date, /* document processing date */ + c_define, /* macro definition */ + c_e, /* emphasis */ + c_i, /* visible index mark */ + c_ii, /* uncapitalised visible index mark */ + c_k, /* uncapitalised cross-reference */ + c_R, /* free text cross-reference */ + c_n, /* numbered list */ + c_nocite, /* bibliography trickery */ + c_preamble, /* document preamble text */ + c_q, /* quote marks */ + c_rule, /* horizontal rule */ + c_title, /* document title */ + c_u, /* aux field is char code */ + c_versionid /* document RCS id */ +}; + +/* Perhaps whitespace should be defined in a more Unicode-friendly way? */ +#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 ) +#define isnl(c) ( (c)==10 ) +#define isdec(c) ( ((c)>='0'&&(c)<='9') ) +#define fromdec(c) ( (c)-'0' ) +#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f')) +#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) ) +#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z')) + +/* + * Keyword comparison function. Like strcmp, but between a wchar_t * + * and a char *. + */ +static int kwcmp(wchar_t const *p, char const *q) +{ + int i; + do + { + i = *p - *q; + } + while (*p++ && *q++ && !i); + return i; +} + +/* + * Match a keyword. + */ +static void match_kw(token * tok) +{ + /* + * FIXME. The ids are explicit in here so as to allow long-name + * equivalents to the various very short keywords. + * + * This list must be sorted, it's searched using binary search. + */ + static const struct { + char const *name; + int id; + } keywords[] = { + { + "#", c__comment} + , /* comment command (\#) */ + { + "-", c__escaped} + , /* nonbreaking hyphen */ + { + "A", c_A} + , /* appendix heading */ + { + "B", c_B} + , /* bibliography entry */ + { + "BR", c_BR} + , /* bibliography rewrite */ + { + "C", c_C} + , /* chapter heading */ + { + "H", c_H} + , /* heading */ + { + "I", c_I} + , /* invisible index mark */ + { + "IM", c_IM} + , /* index merge/rewrite */ + { + "K", c_K} + , /* capitalised cross-reference */ + { + "L", c_L} + , /* Relative/local hyperlink */ + { + "R", c_R} + , /* free text cross-reference */ + { + "U", c_U} + , /* unnumbered-chapter heading */ + { + "W", c_W} + , /* Web hyperlink */ + { + "\\", c__escaped} + , /* escaped backslash (\\) */ + { + "_", c__nbsp} + , /* nonbreaking space (\_) */ + { + "b", c_b} + , /* bulletted list */ + { + "c", c_c} + , /* code */ + { + "cfg", c_cfg} + , /* configuration directive */ + { + "copyright", c_copyright} + , /* copyright statement */ + { + "cw", c_cw} + , /* weak code */ + { + "date", c_date} + , /* document processing date */ + { + "define", c_define} + , /* macro definition */ + { + "e", c_e} + , /* emphasis */ + { + "i", c_i} + , /* visible index mark */ + { + "ii", c_ii} + , /* uncapitalised visible index mark */ + { + "k", c_k} + , /* uncapitalised cross-reference */ + { + "n", c_n} + , /* numbered list */ + { + "nocite", c_nocite} + , /* bibliography trickery */ + { + "preamble", c_preamble} + , /* document preamble text */ + { + "q", c_q} + , /* quote marks */ + { + "rule", c_rule} + , /* horizontal rule */ + { + "title", c_title} + , /* document title */ + { + "versionid", c_versionid} + , /* document RCS id */ + { + "{", c__escaped} + , /* escaped lbrace (\{) */ + { + "}", c__escaped} + , /* escaped rbrace (\}) */ + }; + int i, j, k, c; + + /* + * Special cases: \S{0,1,2,...} and \uABCD. If the syntax + * doesn't match correctly, we just fall through to the + * binary-search phase. + */ + if (tok->text[0] == 'S') + { + /* We expect numeric characters thereafter. */ + wchar_t *p = tok->text + 1; + int n; + if (!*p) + n = 1; + else + { + n = 0; + while (*p && isdec(*p)) + { + n = 10 * n + fromdec(*p); + p++; + } + } + if (!*p) + { + tok->cmd = c_S; + tok->aux = n; + return; + } + } else if (tok->text[0] == 'u') + { + /* We expect hex characters thereafter. */ + wchar_t *p = tok->text + 1; + int n = 0; + while (*p && ishex(*p)) + { + n = 16 * n + fromhex(*p); + p++; + } + if (!*p) + { + tok->cmd = c_u; + tok->aux = n; + return; + } + } + + i = -1; + j = sizeof(keywords) / sizeof(*keywords); + while (j - i > 1) + { + k = (i + j) / 2; + c = kwcmp(tok->text, keywords[k].name); + if (c < 0) + j = k; + else if (c > 0) + i = k; + else + { /* c == 0 */ + + tok->cmd = keywords[k].id; + return; + } + } + + tok->cmd = c__invalid; +} + + +/* + * Read a token from the input file, in the normal way (`normal' in + * the sense that code paragraphs work a different way). + */ +token get_token(input * in) +{ + int c; + int nls; + token ret; + rdstring rs = { 0, 0, NULL }; + filepos cpos; + + ret.cmd = c__invalid; + ret.aux = FALSE; + ret.text = NULL; /* default */ + c = get(in, &cpos); + ret.pos = cpos; + if (iswhite(c)) + { /* tok_white or tok_eop */ + nls = 0; + do + { + if (isnl(c)) + nls++; + } + while ((c = get(in, &cpos)) != EOF && iswhite(c)); + if (c == EOF) + { + ret.type = tok_eof; + return ret; + } + unget(in, c, &cpos); + ret.type = (nls > 1 ? tok_eop : tok_white); + return ret; + } else if (c == EOF) + { /* tok_eof */ + ret.type = tok_eof; + return ret; + } else if (c == '\\') + { /* tok_cmd */ + c = get(in, &cpos); + if (c == '-' || c == '\\' || c == '_' || + c == '#' || c == '{' || c == '}') + { + /* single-char command */ + rdadd(&rs, (wchar_t)c); + } else if (c == 'u') + { + int len = 0; + do + { + rdadd(&rs, (wchar_t)c); + len++; + c = get(in, &cpos); + } + while (ishex(c) && len < 5); + unget(in, c, &cpos); + } else if (iscmd(c)) + { + do + { + rdadd(&rs, (wchar_t)c); + c = get(in, &cpos); + } + while (iscmd(c)); + unget(in, c, &cpos); + } + /* + * Now match the command against the list of available + * ones. + */ + ret.type = tok_cmd; + ret.text = ustrdup(rs.text); + match_kw(&ret); + sfree(rs.text); + return ret; + } else if (c == '{') + { /* tok_lbrace */ + ret.type = tok_lbrace; + return ret; + } else if (c == '}') + { /* tok_rbrace */ + ret.type = tok_rbrace; + return ret; + } else + { /* tok_word */ + /* + * Read a word: the longest possible contiguous sequence of + * things other than whitespace, backslash, braces and + * hyphen. A hyphen terminates the word but is returned as + * part of it; everything else is pushed back for the next + * token. The `aux' field contains TRUE if the word ends in + * a hyphen. + */ + ret.aux = FALSE; /* assumed for now */ + while (1) + { + if (iswhite(c) || c == '{' || c == '}' || c == '\\' || c == EOF) + { + /* Put back the character that caused termination */ + unget(in, c, &cpos); + break; + } else + { + rdadd(&rs, (wchar_t)c); + if (c == '-') + { + ret.aux = TRUE; + break; /* hyphen terminates word */ + } + } + c = get(in, &cpos); + } + ret.type = tok_word; + ret.text = ustrdup(rs.text); + sfree(rs.text); + return ret; + } +} + +/* + * Determine whether the next input character is an open brace (for + * telling code paragraphs from paragraphs which merely start with + * code). + */ +int isbrace(input * in) +{ + int c; + filepos cpos; + + c = get(in, &cpos); + unget(in, c, &cpos); + return (c == '{'); +} + +/* + * Read the rest of a line that starts `\c'. Including nothing at + * all (tok_word with empty text). + */ +token get_codepar_token(input * in) +{ + int c; + token ret; + rdstring rs = { 0, 0, NULL }; + filepos cpos; + + ret.type = tok_word; + c = get(in, &cpos); /* expect (and discard) one space */ + ret.pos = cpos; + if (c == ' ') + { + c = get(in, &cpos); + ret.pos = cpos; + } + while (!isnl(c) && c != EOF) + { + int c2 = c; + c = get(in, &cpos); + /* Discard \r just before \n. */ + if (c2 != 13 || !isnl(c)) + rdadd(&rs, (wchar_t)c2); + } + unget(in, c, &cpos); + ret.text = ustrdup(rs.text); + sfree(rs.text); + return ret; +} + +/* + * Adds a new word to a linked list + */ +static word *addword(word newword, word *** hptrptr) +{ + word *mnewword; + if (!hptrptr) + return NULL; + mnewword = mknew(word); + *mnewword = newword; /* structure copy */ + mnewword->next = NULL; + **hptrptr = mnewword; + *hptrptr = &mnewword->next; + return mnewword; +} + +/* + * Adds a new paragraph to a linked list + */ +static paragraph *addpara(paragraph newpara, paragraph *** hptrptr) +{ + paragraph *mnewpara = mknew(paragraph); + *mnewpara = newpara; /* structure copy */ + mnewpara->next = NULL; + **hptrptr = mnewpara; + *hptrptr = &mnewpara->next; + return mnewpara; +} + +/* + * Destructor before token is reassigned; should catch most memory + * leaks + */ +#define dtor(t) ( sfree(t.text) ) + +/* + * Reads a single file (ie until get() returns EOF) + */ +static void read_file(paragraph *** ret, input * in, indexdata * idx) +{ + token t; + paragraph par; + word wd, **whptr, **idximplicit; + tree234 *macros; + wchar_t utext[2], *wdtext; + int style, spcstyle; + int already; + int iswhite, seenwhite; + int type; + struct stack_item { + enum { + stack_nop = 0, /* do nothing (for error recovery) */ + stack_ualt = 1, /* \u alternative */ + stack_style = 2, /* \e, \c, \cw */ + stack_idx = 4, /* \I, \i, \ii */ + stack_hyper = 8, /* \W */ + stack_quote = 16, /* \q */ + } type; + word **whptr; /* to restore from \u alternatives */ + word **idximplicit; /* to restore from \u alternatives */ + } *sitem; + stack parsestk; + word *indexword=NULL, *uword=NULL, *iword=NULL; + word *idxwordlist; + rdstring indexstr; + int index_downcase=0, index_visible=0, indexing=0; + const rdstring nullrs = { 0, 0, NULL }; + wchar_t uchr; + + t.text = NULL; + macros = newtree234(macrocmp); + already = FALSE; + + /* + * Loop on each paragraph. + */ + while (1) + { + int start_cmd = c__invalid; + par.words = NULL; + par.keyword = NULL; + whptr = &par.words; + + /* + * Get a token. + */ + if (!already) + { + dtor(t), t = get_token(in); + } + already = FALSE; + if (t.type == tok_eof) + break; + + /* + * Parse code paragraphs separately. + */ + if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) + { + par.type = para_Code; + par.fpos = t.pos; + while (1) + { + dtor(t), t = get_codepar_token(in); + wd.type = word_WeakCode; + wd.breaks = FALSE; /* shouldn't need this... */ + wd.text = ustrdup(t.text); + wd.alt = NULL; + wd.fpos = t.pos; + addword(wd, &whptr); + dtor(t), t = get_token(in); + if (t.type == tok_white) + { + /* + * The newline after a code-paragraph line + */ + dtor(t), t = get_token(in); + } + if (t.type == tok_eop || t.type == tok_eof) + break; + else if (t.type != tok_cmd || t.cmd != c_c) + { + error(err_brokencodepara, &t.pos); + addpara(par, ret); + while (t.type != tok_eop) /* error recovery: */ + dtor(t), t = get_token(in); /* eat rest of paragraph */ + goto codeparabroken; /* ick, but such is life */ + } + } + addpara(par, ret); + codeparabroken: + continue; + } + + while (t.type == tok_cmd && macrolookup(macros, in, t.text, &t.pos)) + { + dtor(t), t = get_token(in); + } + + + /* + * This token begins a paragraph. See if it's one of the + * special commands that define a paragraph type. + * + * (note that \# is special in a way, and \nocite takes no + * text) + */ + par.type = para_Normal; + if (t.type == tok_cmd) + { + int needkw=0; + int is_macro = FALSE; + + par.fpos = t.pos; + switch (t.cmd) + { + default: + needkw = -1; + break; + case c__invalid: + error(err_badparatype, t.text, &t.pos); + needkw = 4; + break; + case c__comment: + if (isbrace(in)) + break; /* `\#{': isn't a comment para */ + do + { + dtor(t), t = get_token(in); + } + while (t.type != tok_eop && t.type != tok_eof); + continue; /* next paragraph */ + /* + * `needkw' values: + * + * 1 -- exactly one keyword + * 2 -- at least one keyword + * 4 -- any number of keywords including zero + * 8 -- at least one keyword and then nothing else + * 16 -- nothing at all! no keywords, no body + * 32 -- no keywords at all + */ + case c_A: + needkw = 2; + par.type = para_Appendix; + break; + case c_B: + needkw = 2; + par.type = para_Biblio; + break; + case c_BR: + needkw = 1; + par.type = para_BR; + start_cmd = c_BR; + break; + case c_C: + needkw = 2; + par.type = para_Chapter; + break; + case c_H: + needkw = 2; + par.type = para_Heading; + par.aux = 0; + break; + case c_IM: + needkw = 2; + par.type = para_IM; + start_cmd = c_IM; + break; + case c_S: + needkw = 2; + par.type = para_Subsect; + par.aux = t.aux; + break; + case c_U: + needkw = 32; + par.type = para_UnnumberedChapter; + break; + /* For \b and \n the keyword is optional */ + case c_b: + needkw = 4; + par.type = para_Bullet; + break; + case c_n: + needkw = 4; + par.type = para_NumberedList; + break; + case c_cfg: + needkw = 8; + par.type = para_Config; + start_cmd = c_cfg; + break; + case c_copyright: + needkw = 32; + par.type = para_Copyright; + break; + case c_define: + is_macro = TRUE; + needkw = 1; + break; + /* For \nocite the keyword is _everything_ */ + case c_nocite: + needkw = 8; + par.type = para_NoCite; + break; + case c_preamble: + needkw = 32; + par.type = para_Preamble; + break; + case c_rule: + needkw = 16; + par.type = para_Rule; + break; + case c_title: + needkw = 32; + par.type = para_Title; + break; + case c_versionid: + needkw = 32; + par.type = para_VersionID; + break; + } + + if (needkw > 0) + { + rdstring rs = { 0, 0, NULL }; + int nkeys = 0; + filepos fp; + + /* Get keywords. */ + dtor(t), t = get_token(in); + fp = t.pos; + while (t.type == tok_lbrace) + { + /* This is a keyword. */ + nkeys++; + /* FIXME: there will be bugs if anyone specifies an + * empty keyword (\foo{}), so trap this case. */ + while (dtor(t), t = get_token(in), + t.type == tok_word || + t.type == tok_white || + (t.type == tok_cmd && t.cmd == c__nbsp) || + (t.type == tok_cmd && t.cmd == c__escaped)) + { + if (t.type == tok_white || + (t.type == tok_cmd && t.cmd == c__nbsp)) + rdadd(&rs, ' '); + else + rdadds(&rs, t.text); + } + if (t.type != tok_rbrace) + { + error(err_kwunclosed, &t.pos); + continue; + } + rdadd(&rs, 0); /* add string terminator */ + dtor(t), t = get_token(in); /* eat right brace */ + } + + rdadd(&rs, 0); /* add string terminator */ + + /* See whether we have the right number of keywords. */ + if ((needkw & 48) && nkeys > 0) + error(err_kwillegal, &fp); + if ((needkw & 11) && nkeys == 0) + error(err_kwexpected, &fp); + if ((needkw & 5) && nkeys > 1) + error(err_kwtoomany, &fp); + + if (is_macro) + { + /* + * Macro definition. Get the rest of the line + * as a code-paragraph token, repeatedly until + * there's nothing more left of it. Separate + * with newlines. + */ + rdstring macrotext = { 0, 0, NULL }; + while (1) + { + dtor(t), t = get_codepar_token(in); + if (macrotext.pos > 0) + rdadd(¯otext, L'\n'); + rdadds(¯otext, t.text); + dtor(t), t = get_token(in); + if (t.type == tok_eop) + break; + } + macrodef(macros, rs.text, macrotext.text, fp); + continue; /* next paragraph */ + } + + par.keyword = rdtrim(&rs); + + /* Move to EOP in case of needkw==8 or 16 (no body) */ + if (needkw & 24) + { + /* We allow whitespace even when we expect no para body */ + while (t.type == tok_white) + dtor(t), t = get_token(in); + if (t.type != tok_eop && t.type != tok_eof && + (start_cmd == c__invalid || + t.type != tok_cmd || t.cmd != start_cmd)) + { + error(err_bodyillegal, &t.pos); + /* Error recovery: eat the rest of the paragraph */ + while (t.type != tok_eop && t.type != tok_eof && + (start_cmd == c__invalid || + t.type != tok_cmd || t.cmd != start_cmd)) + dtor(t), t = get_token(in); + } + if (t.type == tok_cmd) + already = TRUE; /* inhibit get_token at top of loop */ + addpara(par, ret); + continue; /* next paragraph */ + } + } + } + + /* + * Now read the actual paragraph, word by word, adding to + * the paragraph list. + * + * Mid-paragraph commands: + * + * \K \k + * \c \cw + * \e + * \i \ii + * \I + * \u + * \W + * \date + * \\ \{ \} + */ + parsestk = stk_new(); + style = word_Normal; + spcstyle = word_WhiteSpace; + indexing = FALSE; + seenwhite = TRUE; + while (t.type != tok_eop && t.type != tok_eof) + { + iswhite = FALSE; + already = FALSE; + + /* Handle implicit paragraph breaks after \IM, \BR etc */ + if (start_cmd != c__invalid && + t.type == tok_cmd && t.cmd == start_cmd) + { + already = TRUE; /* inhibit get_token at top of loop */ + break; + } + + if (t.type == tok_cmd && t.cmd == c__escaped) + { + t.type = tok_word; /* nice and simple */ + t.aux = 0; /* even if `\-' - nonbreaking! */ + } + if (t.type == tok_cmd && t.cmd == c__nbsp) + { + t.type = tok_word; /* nice and simple */ + sfree(t.text); + t.text = ustrdup(L" "); /* text is ` ' not `_' */ + t.aux = 0; /* (nonbreaking) */ + } + switch (t.type) + { + case tok_white: + if (whptr == &par.words) + break; /* strip whitespace at start of para */ + wd.text = NULL; + wd.type = spcstyle; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = FALSE; + + /* + * Inhibit use of whitespace if it's (probably the + * newline) before a repeat \IM / \BR type + * directive. + */ + if (start_cmd != c__invalid) + { + dtor(t), t = get_token(in); + already = TRUE; + if (t.type == tok_cmd && t.cmd == start_cmd) + break; + } + + if (indexing) + rdadd(&indexstr, ' '); + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) + addword(wd, &idximplicit); + iswhite = TRUE; + break; + case tok_word: + if (indexing) + rdadds(&indexstr, t.text); + wd.type = style; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = t.aux; + if (!indexing || index_visible) + { + wd.text = ustrdup(t.text); + addword(wd, &whptr); + } + if (indexing) + { + wd.text = ustrdup(t.text); + addword(wd, &idximplicit); + } + break; + case tok_lbrace: + error(err_unexbrace, &t.pos); + /* Error recovery: push nop */ + sitem = mknew(struct stack_item); + sitem->type = stack_nop; + stk_push(parsestk, sitem); + break; + case tok_rbrace: + sitem = stk_pop(parsestk); + if (!sitem) + error(err_unexbrace, &t.pos); + else + { + if (sitem->type & stack_ualt) + { + whptr = sitem->whptr; + idximplicit = sitem->idximplicit; + } + if (sitem->type & stack_style) + { + style = word_Normal; + spcstyle = word_WhiteSpace; + } + if (sitem->type & stack_idx ) { + indexword->text = ustrdup(indexstr.text); + if (index_downcase) + ustrlow(indexword->text); + indexing = FALSE; + rdadd(&indexstr, L'\0'); + index_merge(idx, FALSE, indexstr.text, idxwordlist); + sfree(indexstr.text); + } + if (sitem->type & stack_hyper) + { + wd.text = NULL; + wd.type = word_HyperEnd; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) + addword(wd, &idximplicit); + } + if (sitem->type & stack_quote) + { + wd.text = NULL; + wd.type = toquotestyle(style); + wd.alt = NULL; + wd.aux = quote_Close; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) + { + rdadd(&indexstr, L'"'); + addword(wd, &idximplicit); + } + } + } + sfree(sitem); + break; + case tok_cmd: + switch (t.cmd) + { + case c__comment: + /* + * In-paragraph comment: \#{ balanced braces } + * + * Anything goes here; even tok_eop. We should + * eat whitespace after the close brace _if_ + * there was whitespace before the \#. + */ + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) + { + error(err_explbr, &t.pos); + } else + { + int braces = 1; + while (braces > 0) + { + dtor(t), t = get_token(in); + if (t.type == tok_lbrace) + braces++; + else if (t.type == tok_rbrace) + braces--; + else if (t.type == tok_eof) + { + error(err_commenteof, &t.pos); + break; + } + } + } + if (seenwhite) + { + already = TRUE; + dtor(t), t = get_token(in); + if (t.type == tok_white) + { + iswhite = TRUE; + already = FALSE; + } + } + break; + case c_q: + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) + { + error(err_explbr, &t.pos); + } else + { + wd.text = NULL; + wd.type = toquotestyle(style); + wd.alt = NULL; + wd.aux = quote_Open; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) + { + rdadd(&indexstr, L'"'); + addword(wd, &idximplicit); + } + sitem = mknew(struct stack_item); + sitem->type = stack_quote; + stk_push(parsestk, sitem); + } + break; + case c_K: + case c_k: + case c_R: + case c_W: + case c_L: + case c_date: + /* + * Keyword, hyperlink, or \date. We expect a + * left brace, some text, and then a right + * brace. No nesting; no arguments. + */ + wd.fpos = t.pos; + wd.breaks = FALSE; + if (t.cmd == c_K) + wd.type = word_UpperXref; + else if (t.cmd == c_k) + wd.type = word_LowerXref; + else if (t.cmd == c_R) + wd.type = word_FreeTextXref; + else if (t.cmd == c_W) + wd.type = word_HyperLink; + else if (t.cmd == c_L) + wd.type = word_LocalHyperLink; + else + wd.type = word_Normal; + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) + { + if (wd.type == word_Normal) + { + time_t thetime = time(NULL); + struct tm *broken = localtime(&thetime); + already = TRUE; + wdtext = ustrftime(NULL, broken); + wd.type = style; + } else + { + error(err_explbr, &t.pos); + wdtext = NULL; + } + } else + { + rdstring rs = { 0, 0, NULL }; + while (dtor(t), t = get_token(in), + t.type == tok_word || t.type == tok_white) + { + if (t.type == tok_white) + rdadd(&rs, ' '); + else + rdadds(&rs, t.text); + } + if (wd.type == word_Normal) + { + time_t thetime = time(NULL); + struct tm *broken = localtime(&thetime); + wdtext = ustrftime(rs.text, broken); + wd.type = style; + } else + { + wdtext = ustrdup(rs.text); + } + sfree(rs.text); + if (t.type != tok_rbrace) + { + error(err_kwexprbr, &t.pos); + } + } + wd.alt = NULL; + wd.aux = 0; + if (!indexing || index_visible) + { + wd.text = ustrdup(wdtext); + addword(wd, &whptr); + } + if (indexing) + { + wd.text = ustrdup(wdtext); + addword(wd, &idximplicit); + } + sfree(wdtext); + if (wd.type == word_FreeTextXref || wd.type == word_HyperLink || wd.type == word_LocalHyperLink) + { + /* + * Hyperlinks are different: they then + * expect another left brace, to begin + * delimiting the text marked by the link. + */ + dtor(t), t = get_token(in); + /* + * Special cases: \W{}\c, \W{}\e, \W{}\cw + */ + sitem = mknew(struct stack_item); + sitem->type = stack_hyper; + if (t.type == tok_cmd && + (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) + { + if (style != word_Normal) + error(err_nestedstyles, &t.pos); + else + { + style = (t.cmd == c_c ? word_Code : + t.cmd == c_cw ? word_WeakCode : word_Emph); + spcstyle = tospacestyle(style); + sitem->type |= stack_style; + } + dtor(t), t = get_token(in); + } + if (t.type != tok_lbrace) + { + error(err_explbr, &t.pos); + sfree(sitem); + } else + { + stk_push(parsestk, sitem); + } + } + break; + case c_c: + case c_cw: + case c_e: + type = t.cmd; + if (style != word_Normal) + { + error(err_nestedstyles, &t.pos); + /* Error recovery: eat lbrace, push nop. */ + dtor(t), t = get_token(in); + sitem = mknew(struct stack_item); + sitem->type = stack_nop; + stk_push(parsestk, sitem); + } + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) + { + error(err_explbr, &t.pos); + } else + { + style = (type == c_c ? word_Code : + type == c_cw ? word_WeakCode : word_Emph); + spcstyle = tospacestyle(style); + sitem = mknew(struct stack_item); + sitem->type = stack_style; + stk_push(parsestk, sitem); + } + break; + case c_i: + case c_ii: + case c_I: + type = t.cmd; + if (indexing) + { + error(err_nestedindex, &t.pos); + /* Error recovery: eat lbrace, push nop. */ + dtor(t), t = get_token(in); + sitem = mknew(struct stack_item); + sitem->type = stack_nop; + stk_push(parsestk, sitem); + } + sitem = mknew(struct stack_item); + sitem->type = stack_idx; + dtor(t), t = get_token(in); + /* + * Special cases: \i\c, \i\e, \i\cw + */ + wd.fpos = t.pos; + if (t.type == tok_cmd && + (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) + { + if (style != word_Normal) + error(err_nestedstyles, &t.pos); + else + { + style = (t.cmd == c_c ? word_Code : + t.cmd == c_cw ? word_WeakCode : word_Emph); + spcstyle = tospacestyle(style); + sitem->type |= stack_style; + } + dtor(t), t = get_token(in); + } + if (t.type != tok_lbrace) + { + sfree(sitem); + error(err_explbr, &t.pos); + } else + { + /* Add an index-reference word with no text as yet */ + wd.type = word_IndexRef; + wd.text = NULL; + wd.alt = NULL; + wd.aux = 0; + wd.breaks = FALSE; + indexword = addword(wd, &whptr); + /* Set up a rdstring to read the index text */ + indexstr = nullrs; + /* Flags so that we do the Right Things with text */ + index_visible = (type != c_I); + index_downcase = (type == c_ii); + indexing = TRUE; + idxwordlist = NULL; + idximplicit = &idxwordlist; + /* Stack item to close the indexing on exit */ + stk_push(parsestk, sitem); + } + break; + case c_u: + uchr = t.aux; + utext[0] = uchr; + utext[1] = 0; + wd.type = style; + wd.breaks = FALSE; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + if (!indexing || index_visible) + { + wd.text = ustrdup(utext); + uword = addword(wd, &whptr); + } else + uword = NULL; + if (indexing) + { + wd.text = ustrdup(utext); + iword = addword(wd, &idximplicit); + } else + iword = NULL; + dtor(t), t = get_token(in); + if (t.type == tok_lbrace) + { + /* + * \u with a left brace. Until the brace + * closes, all further words go on a + * sidetrack from the main thread of the + * paragraph. + */ + sitem = mknew(struct stack_item); + sitem->type = stack_ualt; + sitem->whptr = whptr; + sitem->idximplicit = idximplicit; + stk_push(parsestk, sitem); + whptr = uword ? &uword->alt : NULL; + idximplicit = iword ? &iword->alt : NULL; + } else + { + if (indexing) + rdadd(&indexstr, uchr); + already = TRUE; + } + break; + default: + if (!macrolookup(macros, in, t.text, &t.pos)) + error(err_badmidcmd, t.text, &t.pos); + break; + } + } + if (!already) + dtor(t), t = get_token(in); + seenwhite = iswhite; + } + /* Check the stack is empty */ + if (NULL != (sitem = stk_pop(parsestk))) + { + do + { + sfree(sitem); + sitem = stk_pop(parsestk); + } + while (sitem); + error(err_missingrbrace, &t.pos); + } + stk_free(parsestk); + addpara(par, ret); + } + + /* + * We break to here rather than returning, because otherwise + * this cleanup doesn't happen. + */ + dtor(t); + macrocleanup(macros); +} + +paragraph *read_input(input * in, indexdata * idx) +{ + paragraph *head = NULL; + paragraph **hptr = &head; + + while (in->currindex < in->nfiles) + { + in->currfp = fopen(in->filenames[in->currindex], "r"); + if (in->currfp) + { + setpos(in, in->filenames[in->currindex]); + read_file(&hptr, in, idx); + } + in->currindex++; + } + + return head; +} |