diff options
Diffstat (limited to 'Docs/src/bin/halibut')
-rwxr-xr-x | Docs/src/bin/halibut/LICENSE | 46 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/SConscript | 52 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/biblio.c | 244 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/bk_xhtml.c | 3924 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/contents.c | 492 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/error.c | 450 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/halibut.h | 816 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/help.c | 46 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/index.c | 526 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/input.c | 2976 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/keywords.c | 342 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/licence.c | 34 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/main.c | 642 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/malloc.c | 326 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/misc.c | 714 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/style.c | 14 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/tree234.c | 4920 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/tree234.h | 404 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/ustring.c | 402 | ||||
-rwxr-xr-x | Docs/src/bin/halibut/version.c | 26 |
20 files changed, 8698 insertions, 8698 deletions
diff --git a/Docs/src/bin/halibut/LICENSE b/Docs/src/bin/halibut/LICENSE index 7a6b724..bb28bf3 100755 --- a/Docs/src/bin/halibut/LICENSE +++ b/Docs/src/bin/halibut/LICENSE @@ -1,23 +1,23 @@ -Halibut is copyright (c) 1999-2001 Simon Tatham and James Aylett.
-
-Note: This version is modified by Robert Rainwater and Amir Szekely
-
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated documentation files
-(the "Software"), to deal in the Software without restriction,
-including without limitation the rights to use, copy, modify, merge,
-publish, distribute, sublicense, and/or sell copies of the Software,
-and to permit persons to whom the Software is furnished to do so,
-subject to the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+Halibut is copyright (c) 1999-2001 Simon Tatham and James Aylett. + +Note: This version is modified by Robert Rainwater and Amir Szekely + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation files +(the "Software"), to deal in the Software without restriction, +including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, +and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Docs/src/bin/halibut/SConscript b/Docs/src/bin/halibut/SConscript index 24ecfcb..5c606ca 100755 --- a/Docs/src/bin/halibut/SConscript +++ b/Docs/src/bin/halibut/SConscript @@ -1,26 +1,26 @@ -target = 'halibut'
-
-files = Split("""
- biblio.c
- bk_xhtml.c
- contents.c
- error.c
- help.c
- index.c
- input.c
- keywords.c
- licence.c
- main.c
- malloc.c
- misc.c
- style.c
- tree234.c
- ustring.c
- version.c
-""")
-
-Import('env')
-
-halibut = env.Program(target, files)
-
-Return('halibut')
+target = 'halibut' + +files = Split(""" + biblio.c + bk_xhtml.c + contents.c + error.c + help.c + index.c + input.c + keywords.c + licence.c + main.c + malloc.c + misc.c + style.c + tree234.c + ustring.c + version.c +""") + +Import('env') + +halibut = env.Program(target, files) + +Return('halibut') diff --git a/Docs/src/bin/halibut/biblio.c b/Docs/src/bin/halibut/biblio.c index 0b67105..ff0910c 100755 --- a/Docs/src/bin/halibut/biblio.c +++ b/Docs/src/bin/halibut/biblio.c @@ -1,122 +1,122 @@ -/*
- * biblio.c: process the bibliography
- */
-
-#include <assert.h>
-#include "halibut.h"
-
-static wchar_t *gentext(int num)
-{
- wchar_t text[22];
- wchar_t *p = text + sizeof(text);
- *--p = L'\0';
- *--p = L']';
- while (num != 0)
- {
- assert(p > text);
- *--p = L"0123456789"[num % 10];
- num /= 10;
- }
- assert(p > text);
- *--p = L'[';
- return ustrdup(p);
-}
-
-static void cite_biblio(keywordlist * kl, wchar_t * key, filepos fpos)
-{
- keyword *kw = kw_lookup(kl, key);
- if (!kw)
- error(err_nosuchkw, &fpos, key);
- else
- {
- /*
- * We've found a \k reference. If it's a
- * bibliography entry ...
- */
- if (kw->para->type == para_Biblio)
- {
- /*
- * ... then mark the paragraph as cited.
- */
- kw->para->type = para_BiblioCited;
- }
- }
-}
-
-/*
- * Make a pass through the source form, generating citation formats
- * for bibliography entries and also marking which bibliography
- * entries are actually cited (or \nocite-ed).
- */
-
-void gen_citations(paragraph * source, keywordlist * kl)
-{
- paragraph *para;
- int bibnum = 0;
-
- for (para = source; para; para = para->next)
- {
- word *ptr;
-
- /*
- * \BR and \nocite paragraphs get special processing here.
- */
- if (para->type == para_BR)
- {
- keyword *kw = kw_lookup(kl, para->keyword);
- if (!kw)
- {
- error(err_nosuchkw, ¶->fpos, para->keyword);
- } else if (kw->text)
- {
- error(err_multiBR, ¶->fpos, para->keyword);
- } else
- {
- kw->text = dup_word_list(para->words);
- }
- } else if (para->type == para_NoCite)
- {
- wchar_t *wp = para->keyword;
- while (*wp)
- {
- cite_biblio(kl, wp, para->fpos);
- wp = uadv(wp);
- }
- }
-
- /*
- * Scan for keyword references.
- */
- for (ptr = para->words; ptr; ptr = ptr->next)
- {
- if (ptr->type == word_UpperXref || ptr->type == word_LowerXref
- || ptr->type == word_FreeTextXref)
- cite_biblio(kl, ptr->text, ptr->fpos);
- }
- }
-
- /*
- * We're now almost done; all that remains is to scan through
- * the cited bibliography entries and invent default citation
- * texts for the ones that don't already have explicitly
- * provided \BR text.
- */
- for (para = source; para; para = para->next)
- {
- if (para->type == para_BiblioCited)
- {
- keyword *kw = kw_lookup(kl, para->keyword);
- assert(kw != NULL);
- if (!kw->text)
- {
- word *wd = smalloc(sizeof(word));
- wd->text = gentext(++bibnum);
- wd->type = word_Normal;
- wd->alt = NULL;
- wd->next = NULL;
- kw->text = wd;
- }
- para->kwtext = kw->text;
- }
- }
-}
+/* + * biblio.c: process the bibliography + */ + +#include <assert.h> +#include "halibut.h" + +static wchar_t *gentext(int num) +{ + wchar_t text[22]; + wchar_t *p = text + sizeof(text); + *--p = L'\0'; + *--p = L']'; + while (num != 0) + { + assert(p > text); + *--p = L"0123456789"[num % 10]; + num /= 10; + } + assert(p > text); + *--p = L'['; + return ustrdup(p); +} + +static void cite_biblio(keywordlist * kl, wchar_t * key, filepos fpos) +{ + keyword *kw = kw_lookup(kl, key); + if (!kw) + error(err_nosuchkw, &fpos, key); + else + { + /* + * We've found a \k reference. If it's a + * bibliography entry ... + */ + if (kw->para->type == para_Biblio) + { + /* + * ... then mark the paragraph as cited. + */ + kw->para->type = para_BiblioCited; + } + } +} + +/* + * Make a pass through the source form, generating citation formats + * for bibliography entries and also marking which bibliography + * entries are actually cited (or \nocite-ed). + */ + +void gen_citations(paragraph * source, keywordlist * kl) +{ + paragraph *para; + int bibnum = 0; + + for (para = source; para; para = para->next) + { + word *ptr; + + /* + * \BR and \nocite paragraphs get special processing here. + */ + if (para->type == para_BR) + { + keyword *kw = kw_lookup(kl, para->keyword); + if (!kw) + { + error(err_nosuchkw, ¶->fpos, para->keyword); + } else if (kw->text) + { + error(err_multiBR, ¶->fpos, para->keyword); + } else + { + kw->text = dup_word_list(para->words); + } + } else if (para->type == para_NoCite) + { + wchar_t *wp = para->keyword; + while (*wp) + { + cite_biblio(kl, wp, para->fpos); + wp = uadv(wp); + } + } + + /* + * Scan for keyword references. + */ + for (ptr = para->words; ptr; ptr = ptr->next) + { + if (ptr->type == word_UpperXref || ptr->type == word_LowerXref + || ptr->type == word_FreeTextXref) + cite_biblio(kl, ptr->text, ptr->fpos); + } + } + + /* + * We're now almost done; all that remains is to scan through + * the cited bibliography entries and invent default citation + * texts for the ones that don't already have explicitly + * provided \BR text. + */ + for (para = source; para; para = para->next) + { + if (para->type == para_BiblioCited) + { + keyword *kw = kw_lookup(kl, para->keyword); + assert(kw != NULL); + if (!kw->text) + { + word *wd = smalloc(sizeof(word)); + wd->text = gentext(++bibnum); + wd->type = word_Normal; + wd->alt = NULL; + wd->next = NULL; + kw->text = wd; + } + para->kwtext = kw->text; + } + } +} diff --git a/Docs/src/bin/halibut/bk_xhtml.c b/Docs/src/bin/halibut/bk_xhtml.c index 5b1bb7b..431c04b 100755 --- a/Docs/src/bin/halibut/bk_xhtml.c +++ b/Docs/src/bin/halibut/bk_xhtml.c @@ -1,1962 +1,1962 @@ -/*
- * xhtml backend for Halibut
- * (initial implementation by James Aylett)
- *
- * Still to do:
- *
- * +++ doesn't handle non-breaking hyphens. Not sure how to yet.
- * +++ entity names (from a file -- ideally supply normal SGML files)
- * +++ configuration directive to file split where the current layout
- * code wouldn't. Needs changes to _ponder_layout() and _do_paras(),
- * perhaps others.
- *
- * Limitations:
- *
- * +++ biblio/index references target the nearest section marker, rather
- * than having a dedicated target themselves. In large bibliographies
- * this will cause problems. (The solution is to fake up a response
- * from xhtml_find_section(), probably linking it into the sections
- * chain just in case we need it again, and to make freeing it up
- * easier.) docsrc.pl used to work as we do, however, and SGT agrees that
- * this is acceptable for now.
- * +++ can't cope with leaf-level == 0. It's all to do with the
- * top-level file not being normal, probably not even having a valid
- * section level, and stuff like that. I question whether this is an
- * issue, frankly; small manuals that fit on one page should probably
- * not be written in halibut at all.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include "halibut.h"
-
-struct xhtmlsection_Struct {
- struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */
- struct xhtmlsection_Struct *child; /* NULL if split across files */
- struct xhtmlsection_Struct *parent; /* NULL if split across files */
- struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */
- paragraph *para;
- struct xhtmlfile_Struct *file; /* which file is this a part of? */
- char *fragment; /* fragment id within the file */
- int level;
-};
-
-struct xhtmlfile_Struct {
- struct xhtmlfile_Struct *next;
- struct xhtmlfile_Struct *child;
- struct xhtmlfile_Struct *parent;
- char *filename;
- struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */
- int is_leaf; /* is this file a leaf file, ie does it not have any children? */
-};
-
-typedef struct xhtmlsection_Struct xhtmlsection;
-typedef struct xhtmlfile_Struct xhtmlfile;
-typedef struct xhtmlindex_Struct xhtmlindex;
-
-struct xhtmlindex_Struct {
- int nsection;
- int size;
- xhtmlsection **sections;
-};
-
-typedef struct {
- int just_numbers;
- wchar_t *number_suffix;
-} xhtmlheadfmt;
-
-typedef struct {
- int contents_depth[6];
- int leaf_contains_contents;
- int leaf_level;
- int leaf_smallest_contents;
- int include_version_id;
- wchar_t *author, *description;
- wchar_t *head_end, *body, *body_start, *body_end, *address_start,
- *address_end, *nav_attrs;
- wchar_t *rlink_prefix, *rlink_suffix;
- wchar_t *chm_toc_file, *chm_ind_file;
- int suppress_address;
- xhtmlheadfmt fchapter, *fsect;
- int nfsect;
-} xhtmlconfig;
-
-/*static void xhtml_level(paragraph *, int);
-static void xhtml_level_0(paragraph *);
-static void xhtml_docontents(FILE *, paragraph *, int);
-static void xhtml_dosections(FILE *, paragraph *, int);
-static void xhtml_dobody(FILE *, paragraph *, int);*/
-
-static void xhtml_doheader(FILE *, word *);
-static void xhtml_dofooter(FILE *);
-static void xhtml_versionid(FILE *, word *, int);
-
-static void xhtml_utostr(wchar_t *, char **);
-static int xhtml_para_level(paragraph *);
-static int xhtml_reservedchar(int);
-
-static int xhtml_convert(wchar_t *, char **, int);
-static void xhtml_rdaddwc(rdstringc *, word *, word *);
-static void xhtml_para(FILE *, word *);
-static void xhtml_codepara(FILE *, word *);
-static void xhtml_heading(FILE *, paragraph *);
-
-static void chm_doheader(FILE *, word *);
-static void chm_dofooter(FILE *);
-/* File-global variables are much easier than passing these things
- * all over the place. Evil, but easier. We can replace this with a single
- * structure at some point.
- */
-static xhtmlconfig conf;
-static keywordlist *keywords;
-static indexdata *idx;
-static xhtmlfile *topfile;
-static xhtmlsection *topsection;
-static paragraph *sourceparas;
-static xhtmlfile *lastfile;
-static xhtmlfile *xhtml_last_file = NULL;
-static int last_level = -1;
-static xhtmlsection *currentsection;
-static FILE* chm_toc = NULL;
-static FILE* chm_ind = NULL;
-
-
-static xhtmlconfig xhtml_configure(paragraph * source)
-{
- xhtmlconfig ret;
-
- /*
- * Defaults.
- */
- ret.contents_depth[0] = 2;
- ret.contents_depth[1] = 3;
- ret.contents_depth[2] = 4;
- ret.contents_depth[3] = 5;
- ret.contents_depth[4] = 6;
- ret.contents_depth[5] = 7;
- ret.leaf_level = 2;
- ret.leaf_smallest_contents = 4;
- ret.leaf_contains_contents = FALSE;
- ret.include_version_id = TRUE;
- ret.author = NULL;
- ret.description = NULL;
- ret.head_end = NULL;
- ret.body = NULL;
- ret.body_start = NULL;
- ret.body_end = NULL;
- ret.address_start = NULL;
- ret.address_end = NULL;
- ret.nav_attrs = NULL;
- ret.suppress_address = FALSE;
- ret.chm_toc_file = NULL;
- ret.chm_ind_file = NULL;
- chm_toc = NULL;
- chm_ind = NULL;
- ret.fchapter.just_numbers = FALSE;
- ret.fchapter.number_suffix = ustrdup(L": ");
- ret.nfsect = 2;
- ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect);
- ret.fsect[0].just_numbers = FALSE;
- ret.fsect[0].number_suffix = ustrdup(L": ");
- ret.fsect[1].just_numbers = TRUE;
- ret.fsect[1].number_suffix = ustrdup(L" ");
- ret.rlink_prefix = NULL;
- ret.rlink_suffix = NULL;
-
- for (; source; source = source->next)
- {
- if (source->type == para_Config)
- {
- if (!ustricmp(source->keyword, L"xhtml-contents-depth-0"))
- {
- ret.contents_depth[0] = utoi(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1"))
- {
- ret.contents_depth[1] = utoi(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2"))
- {
- ret.contents_depth[2] = utoi(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3"))
- {
- ret.contents_depth[3] = utoi(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4"))
- {
- ret.contents_depth[4] = utoi(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5"))
- {
- ret.contents_depth[5] = utoi(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-leaf-level"))
- {
- ret.leaf_level = utoi(uadv(source->keyword));
- } else
- if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents"))
- {
- ret.leaf_smallest_contents = utoi(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-versionid"))
- {
- ret.include_version_id = utob(uadv(source->keyword));
- } else
- if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents"))
- {
- ret.leaf_contains_contents = utob(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-suppress-address"))
- {
- ret.suppress_address = utob(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-author"))
- {
- ret.author = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"chm-toc-file"))
- {
- ret.chm_toc_file = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"chm-ind-file"))
- {
- ret.chm_ind_file = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"xhtml-description"))
- {
- ret.description = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"xhtml-head-end"))
- {
- ret.head_end = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"xhtml-body-start"))
- {
- ret.body_start = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"xhtml-body-tag"))
- {
- ret.body = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"xhtml-body-end"))
- {
- ret.body_end = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"xhtml-address-start"))
- {
- ret.address_start = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"xhtml-address-end"))
- {
- ret.address_end = uadv(source->keyword);
- } else
- if (!ustricmp(source->keyword, L"xhtml-navigation-attributes"))
- {
- ret.nav_attrs = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric"))
- {
- ret.fchapter.just_numbers = utob(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix"))
- {
- ret.fchapter.number_suffix = ustrdup(uadv(source->keyword));
- } else if (!ustricmp(source->keyword, L"xhtml-rlink-prefix"))
- {
- ret.rlink_prefix = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"xhtml-rlink-suffix"))
- {
- ret.rlink_suffix = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"xhtml-section-numeric"))
- {
- wchar_t *p = uadv(source->keyword);
- int n = 0;
- if (uisdigit(*p))
- {
- n = utoi(p);
- p = uadv(p);
- }
- if (n >= ret.nfsect)
- {
- int i;
- ret.fsect = resize(ret.fsect, n + 1);
- for (i = ret.nfsect; i <= n; i++)
- ret.fsect[i] = ret.fsect[ret.nfsect - 1];
- ret.nfsect = n + 1;
- }
- ret.fsect[n].just_numbers = utob(p);
- } else if (!ustricmp(source->keyword, L"xhtml-section-suffix"))
- {
- wchar_t *p = uadv(source->keyword);
- int n = 0;
- if (uisdigit(*p))
- {
- n = utoi(p);
- p = uadv(p);
- }
- if (n >= ret.nfsect)
- {
- int i;
- ret.fsect = resize(ret.fsect, n + 1);
- for (i = ret.nfsect; i <= n; i++)
- ret.fsect[i] = ret.fsect[ret.nfsect - 1];
- ret.nfsect = n + 1;
- }
- ret.fsect[n].number_suffix = ustrdup(p);
- }
- }
- }
-
- /* printf(" !!! leaf_level = %i\n", ret.leaf_level);
- printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]);
- printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]);
- printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]);
- printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]);
- printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]);
- printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]);
- printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents); */
- return ret;
-}
-
-static xhtmlsection *xhtml_new_section(xhtmlsection * last)
-{
- xhtmlsection *ret = mknew(xhtmlsection);
- ret->next = NULL;
- ret->child = NULL;
- ret->parent = NULL;
- ret->chain = last;
- ret->para = NULL;
- ret->file = NULL;
- ret->fragment = NULL;
- ret->level = -1; /* marker: end of chain */
- return ret;
-}
-
-/* Returns NULL or the section that marks that paragraph */
-static xhtmlsection *xhtml_find_section(paragraph * p)
-{
- xhtmlsection *ret = topsection;
- if (xhtml_para_level(p) == -1)
- { /* first, we back-track to a section paragraph */
- paragraph *p2 = sourceparas;
- paragraph *p3 = NULL;
- while (p2 && p2 != p)
- {
- if (xhtml_para_level(p2) != -1)
- {
- p3 = p2;
- }
- p2 = p2->next;
- }
- if (p3 == NULL)
- { /* for some reason, we couldn't find a section before this paragraph ... ? */
- /* Note that this can happen, if you have a cross-reference to before the first chapter starts.
- * So don't do that, then.
- */
- return NULL;
- }
- p = p3;
- }
- while (ret && ret->para != p)
- {
-/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/
- ret = ret->chain;
- }
- return ret;
-}
-
-static xhtmlfile *xhtml_new_file(xhtmlsection * sect)
-{
- xhtmlfile *ret = mknew(xhtmlfile);
-
- ret->next = NULL;
- ret->child = NULL;
- ret->parent = NULL;
- ret->filename = NULL;
- ret->sections = sect;
- ret->is_leaf = (sect != NULL && sect->level == conf.leaf_level);
- if (sect == NULL)
- {
- if (conf.leaf_level == 0)
- { /* currently unused */
-#define FILENAME_MANUAL "Manual.html"
-#define FILENAME_CONTENTS "Contents.html"
- ret->filename = smalloc(strlen(FILENAME_MANUAL) + 1);
- sprintf(ret->filename, FILENAME_MANUAL);
- } else
- {
- ret->filename = smalloc(strlen(FILENAME_CONTENTS) + 1);
- sprintf(ret->filename, FILENAME_CONTENTS);
- }
- } else
- {
- paragraph *p = sect->para;
- rdstringc fname_c = { 0, 0, NULL };
- char *c;
- word *w;
- for (w = (p->kwtext) ? (p->kwtext) : (p->words); w; w = w->next)
- {
- switch (removeattr(w->type))
- {
- case word_Normal:
- /*case word_Emph:
- case word_Code:
- case word_WeakCode: */
- xhtml_utostr(w->text, &c);
- rdaddsc(&fname_c, c);
- sfree(c);
- break;
- }
- }
- rdaddsc(&fname_c, ".html");
- ret->filename = rdtrimc(&fname_c);
- }
- /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false")); */
- return ret;
-}
-
-/*
- * Walk the tree fixing up files which are actually leaf (ie
- * have no children) but aren't at leaf level, so they have the
- * leaf flag set.
- */
-void xhtml_fixup_layout(xhtmlfile * file)
-{
- if (file->child == NULL)
- {
- file->is_leaf = TRUE;
- } else
- {
- xhtml_fixup_layout(file->child);
- }
- if (file->next)
- xhtml_fixup_layout(file->next);
-}
-
-/*
- * Create the tree structure so we know where everything goes.
- * Method:
- *
- * Ignoring file splitting, we have three choices with each new section:
- *
- * +-----------------+-----------------+
- * | | |
- * X +----X----+ (1)
- * | |
- * Y (2)
- * |
- * (3)
- *
- * Y is the last section we added (currentsect).
- * If sect is the section we want to add, then:
- *
- * (1) if sect->level < currentsect->level
- * (2) if sect->level == currentsect->level
- * (3) if sect->level > currentsect->level
- *
- * This requires the constraint that you never skip section numbers
- * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing).
- *
- * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change
- * more than one level at a time. Lots of asserts, and probably part of
- * the algorithm here, rely on this being true. (It currently isn't
- * enforced by halibut, however.)
- *
- * File splitting makes this harder. For instance, say we added at (3)
- * above and now need to add another section. We are splitting at level
- * 2, ie the level of Y. Z is the last section we added:
- *
- * +-----------------+-----------------+
- * | | |
- * X +----X----+ (1)
- * | |
- * +----Y----+ (1)
- * | |
- * Z (2)
- * |
- * (3)
- *
- * The (1) case is now split; we need to search upwards to find where
- * to actually link in. The other two cases remain the same (and will
- * always be like this).
- *
- * File splitting makes this harder, however. The decision of whether
- * to split to a new file is always on the same condition, however (is
- * the level of this section higher than the leaf_level configuration
- * value or not).
- *
- * Treating the cases backwards:
- *
- * (3) same file if sect->level > conf.leaf_level, otherwise new file
- *
- * if in the same file, currentsect->child points to sect
- * otherwise the linking is done through the file tree (which works
- * in more or less the same way, ie currentfile->child points to
- * the new file)
- *
- * (2) same file if sect->level > conf.leaf_level, otherwise new file
- *
- * if in the same file, currentsect->next points to sect
- * otherwise file linking and currentfile->next points to the new
- * file (we know that Z must have caused a new file to be created)
- *
- * (1) same file if sect->level > conf.leaf_level, otherwise new file
- *
- * this is actually effectively the same case as (2) here,
- * except that we first have to travel up the sections to figure
- * out which section this new one will be a sibling of. In doing
- * so, we may disappear off the top of a file and have to go up
- * to its parent in the file tree.
- *
- */
-static void xhtml_ponder_layout(paragraph * p)
-{
- xhtmlsection *lastsection;
- xhtmlsection *currentsect;
- xhtmlfile *currentfile;
-
- lastfile = NULL;
- topsection = xhtml_new_section(NULL);
- topfile = xhtml_new_file(NULL);
- lastsection = topsection;
- currentfile = topfile;
- currentsect = topsection;
-
- if (conf.leaf_level == 0)
- {
- topfile->is_leaf = 1;
- topfile->sections = topsection;
- topsection->file = topfile;
- }
-
- for (; p; p = p->next)
- {
- int level = xhtml_para_level(p);
- if (level > 0)
- { /* actually a section */
- xhtmlsection *sect;
- word *w;
- char *c;
- rdstringc fname_c = { 0, 0, NULL };
-
- sect = xhtml_new_section(lastsection);
- lastsection = sect;
- sect->para = p;
- for (w = (p->kwtext2) ? (p->kwtext2) : (p->words); w; w = w->next)
- { /* kwtext2 because we want numbers only! */
- switch (removeattr(w->type))
- {
- case word_Normal:
- /*case word_Emph:
- case word_Code:
- case word_WeakCode: */
- xhtml_utostr(w->text, &c);
- rdaddsc(&fname_c, c);
- sfree(c);
- break;
- }
- }
-/* rdaddsc(&fname_c, ".html");*/
- sect->fragment = rdtrimc(&fname_c);
- sect->level = level;
- /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level); */
-
- if (level > currentsect->level)
- { /* case (3) */
- if (level > conf.leaf_level)
- { /* same file */
- assert(currentfile->is_leaf);
- currentsect->child = sect;
- sect->parent = currentsect;
- sect->file = currentfile;
- /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename); */
- currentsect = sect;
- } else
- { /* new file */
- xhtmlfile *file = xhtml_new_file(sect);
- assert(!currentfile->is_leaf);
- currentfile->child = file;
- sect->file = file;
- file->parent = currentfile;
- /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename); */
- currentfile = file;
- currentsect = sect;
- }
- } else if (level >= currentsect->file->sections->level)
- {
- /* Case (1) or (2) *AND* still under the section that starts
- * the current file.
- *
- * I'm not convinced that this couldn't be rolled in with the
- * final else {} leg further down. It seems a lot of effort
- * this way.
- */
- if (level > conf.leaf_level)
- { /* stick within the same file */
- assert(currentfile->is_leaf);
- sect->file = currentfile;
- while (currentsect && currentsect->level > level &&
- currentsect->file == currentsect->parent->file)
- {
- currentsect = currentsect->parent;
- }
- assert(currentsect);
- currentsect->next = sect;
- assert(currentsect->level == sect->level);
- sect->parent = currentsect->parent;
- currentsect = sect;
- /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename); */
- } else
- { /* new file */
- xhtmlfile *file = xhtml_new_file(sect);
- sect->file = file;
- currentfile->next = file;
- file->parent = currentfile->parent;
- file->is_leaf = (level == conf.leaf_level);
- file->sections = sect;
- /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename); */
- currentfile = file;
- currentsect = sect;
- }
- } else
- { /* Case (1) or (2) and we must move up the file tree first */
- /* this loop is now probably irrelevant - we know we can't connect
- * to anything in the current file */
- while (currentsect && level < currentsect->level)
- {
- currentsect = currentsect->parent;
- if (currentsect)
- {
- /* printf(" * up one level to '%s'\n", currentsect->fragment); */
- } else
- {
- /* printf(" * up one level (off top of current file)\n"); */
- }
- }
- if (currentsect)
- {
- /* I'm pretty sure this can now never fire */
- assert(currentfile->is_leaf);
- /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename); */
- sect->file = currentfile;
- currentsect->next = sect;
- currentsect = sect;
- } else
- { /* find a file we can attach to */
- while (currentfile && currentfile->sections
- && level < currentfile->sections->level)
- {
- currentfile = currentfile->parent;
- if (currentfile)
- {
- /* printf(" * up one file level to '%s'\n", currentfile->filename); */
- } else
- {
- /* printf(" * up one file level (off top of tree)\n"); */
- }
- }
- if (currentfile)
- { /* new file (we had to skip up a file to
- get here, so we must be dealing with a
- level no lower than the configured
- leaf_level */
- xhtmlfile *file = xhtml_new_file(sect);
- currentfile->next = file;
- sect->file = file;
- file->parent = currentfile->parent;
- file->is_leaf = (level == conf.leaf_level);
- file->sections = sect;
- /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename); */
- currentfile = file;
- currentsect = sect;
- } else
- {
- fatal(err_whatever,
- "Ran off the top trying to connect sibling: strange document.");
- }
- }
- }
- }
- }
- topsection = lastsection; /* get correct end of the chain */
- xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */
-}
-
-static void xhtml_do_index();
-static void xhtml_do_file(xhtmlfile * file);
-static void xhtml_do_top_file(xhtmlfile * file, paragraph * sourceform);
-static void xhtml_do_paras(FILE * fp, paragraph * p);
-static int xhtml_do_contents_limit(FILE * fp, xhtmlfile * file, int limit);
-static int xhtml_do_contents_section_limit(FILE * fp, xhtmlsection * section, int limit);
-static int xhtml_add_contents_entry(FILE * fp, xhtmlsection * section, int limit);
-static int xhtml_do_contents(FILE * fp, xhtmlfile * file);
-static int xhtml_do_naked_contents(FILE * fp, xhtmlfile * file);
-static void xhtml_do_sections(FILE * fp, xhtmlsection * sections);
-
-/*
- * Do all the files in this structure.
- */
-static void xhtml_do_files(xhtmlfile * file)
-{
- xhtml_do_file(file);
- if (file->child)
- xhtml_do_files(file->child);
- if (file->next)
- xhtml_do_files(file->next);
-}
-
-/*
- * Free up all memory used by the file tree from 'xfile' downwards
- */
-static void xhtml_free_file(xhtmlfile * xfile)
-{
- if (xfile == NULL)
- {
- return;
- }
-
- if (xfile->filename)
- {
- sfree(xfile->filename);
- }
- xhtml_free_file(xfile->child);
- xhtml_free_file(xfile->next);
- sfree(xfile);
-}
-
-/*
- * Main function.
- */
-void
-xhtml_backend(paragraph * sourceform, keywordlist * in_keywords,
- indexdata * in_idx)
-{
-/* int i;*/
- indexentry *ientry;
- int ti;
- xhtmlsection *xsect;
-
- sourceparas = sourceform;
- conf = xhtml_configure(sourceform);
- keywords = in_keywords;
- idx = in_idx;
-
- /* Clear up the index entries backend data pointers */
- for (ti = 0;
- (ientry = (indexentry *) index234(idx->entries, ti)) != NULL; ti++)
- {
- ientry->backend_data = NULL;
- }
-
- xhtml_ponder_layout(sourceform);
-
- /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */
-/* xhtml_level_0(sourceform);
- for (i=1; i<=conf.leaf_level; i++)
- {
- xhtml_level(sourceform, i);
- }*/
-
- /* new system ... (writes to *.html, but isn't fully trusted) */
- xhtml_do_top_file(topfile, sourceform);
- assert(!topfile->next); /* shouldn't have a sibling at all */
- if (topfile->child)
- {
- xhtml_do_files(topfile->child);
- xhtml_do_index();
- }
-
- /* release file, section, index data structures */
- xsect = topsection;
- while (xsect)
- {
- xhtmlsection *tmp = xsect->chain;
- if (xsect->fragment)
- {
- sfree(xsect->fragment);
- }
- sfree(xsect);
- xsect = tmp;
- }
- xhtml_free_file(topfile);
- for (ti = 0;
- (ientry = (indexentry *) index234(idx->entries, ti)) != NULL; ti++)
- {
- if (ientry->backend_data != NULL)
- {
- xhtmlindex *xi = (xhtmlindex *) ientry->backend_data;
- if (xi->sections != NULL)
- {
- sfree(xi->sections);
- }
- sfree(xi);
- }
- ientry->backend_data = NULL;
- }
- {
- int i;
- sfree(conf.fchapter.number_suffix);
- for (i = 0; i < conf.nfsect; i++)
- sfree(conf.fsect[i].number_suffix);
- sfree(conf.fsect);
- }
-}
-
-static int xhtml_para_level(paragraph * p)
-{
- switch (p->type)
- {
- case para_Title:
- return 0;
- break;
- case para_UnnumberedChapter:
- case para_Chapter:
- case para_Appendix:
- return 1;
- break;
-/* case para_BiblioCited:
- return 2;
- break;*/
- case para_Heading:
- case para_Subsect:
- return p->aux + 2;
- break;
- default:
- return -1;
- break;
- }
-}
-
-static char *xhtml_index_filename = "IndexPage.html";
-
-/* Output the nav links for the current file.
- * file == NULL means we're doing the index
- */
-static void xhtml_donavlinks(FILE * fp, xhtmlfile * file)
-{
- xhtmlfile *xhtml_next_file = NULL;
- fprintf(fp, "<p");
- if (conf.nav_attrs != NULL)
- {
- fprintf(fp, " %ls>", conf.nav_attrs);
- } else
- {
- fprintf(fp, ">");
- }
- if (xhtml_last_file == NULL)
- {
- fprintf(fp, "Previous | ");
- } else
- {
- fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename);
- }
- fprintf(fp, "<a href='Contents.html'>Contents</a> | ");
- if (file != NULL)
- { /* otherwise we're doing nav links for the index */
- if (xhtml_next_file == NULL)
- xhtml_next_file = file->child;
- if (xhtml_next_file == NULL)
- xhtml_next_file = file->next;
- if (xhtml_next_file == NULL)
- xhtml_next_file = file->parent->next;
- }
- if (xhtml_next_file == NULL)
- {
- if (file == NULL)
- { /* index, so no next file */
- fprintf(fp, "Next ");
- } else
- {
- fprintf(fp, "<a href='%s'>Next</a>", xhtml_index_filename);
- }
- } else
- {
- fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename);
- }
- fprintf(fp, "</p>\n");
-}
-
-/* Write out the index file */
-static void xhtml_do_index_body(FILE * fp)
-{
- indexentry *y;
- int ti;
-
- if (count234(idx->entries) == 0)
- return; /* don't write anything at all */
-
- fprintf(fp, "<dl>\n");
- /* iterate over idx->entries using the tree functions and display everything */
- for (ti = 0; (y = (indexentry *) index234(idx->entries, ti)) != NULL;
- ti++)
- {
- if (y->backend_data)
- {
- int i;
- xhtmlindex *xi;
-
- fprintf(fp, "<dt>");
- xhtml_para(fp, y->text);
- fprintf(fp, "</dt>\n<dd>");
-
- xi = (xhtmlindex *) y->backend_data;
- for (i = 0; i < xi->nsection; i++)
- {
- xhtmlsection *sect = xi->sections[i];
- if (sect)
- {
- fprintf(fp, "<a href='%s#%s'>", sect->file->filename,
- sect->fragment);
- if (sect->para->kwtext)
- {
- xhtml_para(fp, sect->para->kwtext);
- } else if (sect->para->words)
- {
- xhtml_para(fp, sect->para->words);
- }
- fprintf(fp, "</a>");
- if (i + 1 < xi->nsection)
- {
- fprintf(fp, ", ");
- }
- }
- }
- fprintf(fp, "</dd>\n");
- }
- }
- fprintf(fp, "</dl>\n");
-}
-static void xhtml_do_index()
-{
- word temp_word =
- { NULL, NULL, word_Normal, 0, 0, L"Index", {NULL, 0, 0} };
- FILE *fp = fopen(xhtml_index_filename, "w");
-
- if (fp == NULL)
- fatal(err_cantopenw, xhtml_index_filename);
- xhtml_doheader(fp, &temp_word);
- xhtml_donavlinks(fp, NULL);
-
- xhtml_do_index_body(fp);
-
- xhtml_donavlinks(fp, NULL);
- xhtml_dofooter(fp);
- fclose(fp);
-}
-
-/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */
-static void xhtml_do_file(xhtmlfile * file)
-{
- FILE *fp = fopen(file->filename, "w");
- if (fp == NULL)
- fatal(err_cantopenw, file->filename);
-
- if (file->sections->para->words)
- {
- xhtml_doheader(fp, file->sections->para->words);
- } else if (file->sections->para->kwtext)
- {
- xhtml_doheader(fp, file->sections->para->kwtext);
- } else
- {
- xhtml_doheader(fp, NULL);
- }
-
- xhtml_donavlinks(fp, file);
-
- if (file->is_leaf && conf.leaf_contains_contents &&
- xhtml_do_contents(NULL, file) >= conf.leaf_smallest_contents)
- xhtml_do_contents(fp, file);
- xhtml_do_sections(fp, file->sections);
- if (!file->is_leaf)
- xhtml_do_naked_contents(fp, file);
-
- xhtml_donavlinks(fp, file);
-
- xhtml_dofooter(fp);
- fclose(fp);
-
- xhtml_last_file = file;
-}
-
-/* Output the top-level file. */
-static void xhtml_do_top_file(xhtmlfile * file, paragraph * sourceform)
-{
- paragraph *p;
- char fname[4096];
- int done = FALSE;
-
- FILE *fp = fopen(file->filename, "w");
- if (fp == NULL)
- fatal(err_cantopenw, file->filename);
-
- ustrtoa(conf.chm_toc_file, fname, 4096);
- if(*fname)
- {
- chm_toc = fopen(fname, "w");
- if (chm_toc == NULL)
- fatal(err_cantopenw, fname);
- }
- else
- chm_toc = NULL;
-
- ustrtoa(conf.chm_ind_file, fname, 4096);
- if(*fname){
- chm_ind = fopen(fname, "w");
- if (chm_ind == NULL)
- fatal(err_cantopenw, fname);
- }
- else
- chm_ind = NULL;
-
- /* Do the title -- only one allowed */
- for (p = sourceform; p && !done; p = p->next)
- {
- if (p->type == para_Title)
- {
- xhtml_doheader(fp, p->words);
- if(chm_toc)chm_doheader(chm_toc, p->words);
- if(chm_ind)chm_doheader(chm_ind, p->words);
- done = TRUE;
- }
- }
- if (!done)
- xhtml_doheader(fp, NULL /* Eek! */ );
-
- /*
- * Display the title.
- */
- for (p = sourceform; p; p = p->next)
- {
- if (p->type == para_Title)
- {
- xhtml_heading(fp, p);
- break;
- }
- }
-
- /* Do the preamble and copyright */
- for (p = sourceform; p; p = p->next)
- {
- if (p->type == para_Preamble)
- {
- fprintf(fp, "<p>");
- xhtml_para(fp, p->words);
- fprintf(fp, "</p>\n");
- }
- }
- for (p = sourceform; p; p = p->next)
- {
- if (p->type == para_Copyright)
- {
- fprintf(fp, "<p>");
- xhtml_para(fp, p->words);
- fprintf(fp, "</p>\n");
- }
- }
-
- xhtml_do_contents(fp, file);
- xhtml_do_sections(fp, file->sections);
-
- /*
- * Put the index in the top file if we're in single-file mode
- * (leaf-level 0).
- */
- if (conf.leaf_level == 0 && count234(idx->entries) > 0)
- {
- fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n");
- xhtml_do_index_body(fp);
- }
-
- xhtml_dofooter(fp);
- if(chm_toc)chm_dofooter(chm_toc);
- if(chm_ind)chm_dofooter(chm_ind);
- fclose(fp);
- if(chm_toc)
- {
- fclose(chm_toc);
- chm_toc = NULL;
- }
- if(chm_ind)
- {
- fclose(chm_ind);
- chm_ind = NULL;
- }
-}
-
-/* Convert a Unicode string to an ASCII one. '?' is
- * used for unmappable characters.
- */
-static void xhtml_utostr(wchar_t * in, char **out)
-{
- int l = ustrlen(in);
- int i;
- *out = smalloc(l + 1);
- for (i = 0; i < l; i++)
- {
- if (in[i] >= 32 && in[i] <= 126)
- (*out)[i] = (char) in[i];
- else
- (*out)[i] = '?';
- }
- (*out)[i] = 0;
-}
-
-/*
- * Write contents for the given file, and subfiles, down to
- * the appropriate contents depth. Returns the number of
- * entries written.
- */
-static int xhtml_do_contents(FILE * fp, xhtmlfile * file)
-{
- int level, limit, start_level, count = 0;
- if (!file)
- return 0;
-
- level = (file->sections) ? (file->sections->level) : (0);
- limit = conf.contents_depth[(level > 5) ? (5) : (level)];
- start_level = (file->is_leaf) ? (level - 1) : (level);
- last_level = start_level;
-
- count += xhtml_do_contents_section_limit(fp, file->sections, limit);
- count += xhtml_do_contents_limit(fp, file->child, limit);
- if (fp != NULL)
- {
- while (last_level > start_level)
- {
- last_level--;
- fprintf(fp, "</ul>\n");
- if(chm_toc)fprintf(chm_toc, "</ul>\n");
- }
- }
- return count;
-}
-
-/* As above, but doesn't do anything in the current file */
-static int xhtml_do_naked_contents(FILE * fp, xhtmlfile * file)
-{
- int level, limit, start_level, count = 0;
- if (!file)
- return 0;
-
- level = (file->sections) ? (file->sections->level) : (0);
- limit = conf.contents_depth[(level > 5) ? (5) : (level)];
- start_level = (file->is_leaf) ? (level - 1) : (level);
- last_level = start_level;
-
- count = xhtml_do_contents_limit(fp, file->child, limit);
- if (fp != NULL)
- {
- while (last_level > start_level)
- {
- last_level--;
- fprintf(fp, "</ul>\n");
- if(chm_toc)fprintf(chm_toc, "</ul>\n");
- }
- }
- return count;
-}
-
-/*
- * Write contents for the given file, children, and siblings, down to
- * given limit contents depth.
- */
-static int xhtml_do_contents_limit(FILE * fp, xhtmlfile * file, int limit)
-{
- int count = 0;
- while (file)
- {
- count += xhtml_do_contents_section_limit(fp, file->sections, limit);
- count += xhtml_do_contents_limit(fp, file->child, limit);
- file = file->next;
- }
- return count;
-}
-
-/*
- * Write contents entries for the given section tree, down to the
- * limit contents depth.
- */
-static int
-xhtml_do_contents_section_deep_limit(FILE * fp, xhtmlsection * section,
- int limit)
-{
- int count = 0;
- while (section)
- {
- if (!xhtml_add_contents_entry(fp, section, limit))
- return 0;
- else
- count++;
- count +=
- xhtml_do_contents_section_deep_limit(fp, section->child, limit);
- section = section->next;
- }
- return count;
-}
-
-/*
- * Write contents entries for the given section tree, down to the
- * limit contents depth.
- */
-static int
-xhtml_do_contents_section_limit(FILE * fp, xhtmlsection * section, int limit)
-{
- int count = 0;
- if (!section)
- return 0;
- xhtml_add_contents_entry(fp, section, limit);
- count = 1;
- count += xhtml_do_contents_section_deep_limit(fp, section->child, limit);
- /* section=section->child;
- while (section && xhtml_add_contents_entry(fp, section, limit)) {
- section = section->next;
- } */
- return count;
-}
-
-/*
- * Add a section entry, unless we're exceeding the limit, in which
- * case return FALSE (otherwise return TRUE).
- */
-static int
-xhtml_add_contents_entry(FILE * fp, xhtmlsection * section, int limit)
-{
- if (!section || section->level > limit)
- return FALSE;
- if (fp == NULL || section->level < 0)
- return TRUE;
- while (last_level > section->level)
- {
- last_level--;
- fprintf(fp, "</ul>\n");
- if(chm_toc)fprintf(chm_toc, "</ul>\n");
- }
- while (last_level < section->level)
- {
- last_level++;
- fprintf(fp, "<ul>\n");
- if(chm_toc)fprintf(chm_toc, "<ul>\n");
- }
- fprintf(fp, "<li>");
- fprintf(fp, "<a %shref=\"%s#%s\">",
- (section->para->type == para_Chapter|| section->para->type == para_Appendix) ? "class=\"btitle\" " : "",
- section->file->filename,
- (section->para->type == para_Chapter) ? "" : section->fragment);
- if(chm_toc)fprintf(chm_toc, "<li><OBJECT type=\"text/sitemap\"><param name=\"Local\" value=\"%s#%s\"><param name=\"Name\" value=\"",
- section->file->filename,
- (section->para->type == para_Chapter) ? "" : section->fragment);
- if(chm_ind)fprintf(chm_ind, "<li><OBJECT type=\"text/sitemap\"><param name=\"Local\" value=\"%s#%s\"><param name=\"Name\" value=\"",
- section->file->filename,
- (section->para->type == para_Chapter) ? "" : section->fragment);
- //%s
- if (section->para->type == para_Chapter
- || section->para->type == para_Appendix)
- fprintf(fp, "<b>");
- if ((section->para->type != para_Heading
- && section->para->type != para_Subsect) || (section->para->kwtext
- && !section->para->
- words))
- {
- xhtml_para(fp, section->para->kwtext);
- if(chm_toc)xhtml_para(chm_toc, section->para->kwtext);
- if (section->para->words){
- fprintf(fp, ": ");
- if(chm_toc)fprintf(chm_toc, ": ");
- }
- }
- if (section->para->type == para_Chapter
- || section->para->type == para_Appendix)
- fprintf(fp, "</b>");
- if (section->para->words)
- {
- xhtml_para(fp, section->para->words);
- if(chm_toc)xhtml_para(chm_toc, section->para->words);
- if(chm_ind)xhtml_para(chm_ind, section->para->words);
- }
- fprintf(fp, "</a></li>\n");
- if(chm_toc)fprintf(chm_toc,"\"></OBJECT></li>\n");
- if(chm_ind)fprintf(chm_ind,"\"></OBJECT></li>\n");
- return TRUE;
-}
-
-/*
- * Write all the sections in this file. Do all paragraphs in this section, then all
- * children (recursively), then go on to the next one (tail recursively).
- */
-static void xhtml_do_sections(FILE * fp, xhtmlsection * sections)
-{
- while (sections)
- {
- currentsection = sections;
- xhtml_do_paras(fp, sections->para);
- xhtml_do_sections(fp, sections->child);
- sections = sections->next;
- }
-}
-
-/* Write this list of paragraphs. Close off all lists at the end. */
-static void xhtml_do_paras(FILE * fp, paragraph * p)
-{
- int last_type = -1, first = TRUE;
- if (!p)
- return;
-
-/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/
- for (; p && (xhtml_para_level(p) == -1 || first); p = p->next)
- {
- first = FALSE;
- switch (p->type)
- {
- /*
- * Things we ignore because we've already processed them or
- * aren't going to touch them in this pass.
- */
- case para_IM:
- case para_BR:
- case para_Biblio: /* only touch BiblioCited */
- case para_VersionID:
- case para_Copyright:
- case para_Preamble:
- case para_NoCite:
- case para_Title:
- break;
-
- /*
- * Chapter titles.
- */
- case para_Chapter:
- case para_Appendix:
- case para_UnnumberedChapter:
- xhtml_heading(fp, p);
- break;
-
- case para_Heading:
- case para_Subsect:
- xhtml_heading(fp, p);
- break;
-
- case para_Rule:
- fprintf(fp, "\n<hr />\n");
- break;
-
- case para_Normal:
- fprintf(fp, "\n<p>");
- xhtml_para(fp, p->words);
- fprintf(fp, "</p>\n");
- break;
-
- case para_Bullet:
- case para_NumberedList:
- case para_BiblioCited:
- if (last_type != p->type)
- {
- /* start up list if necessary */
- if (p->type == para_Bullet)
- {
- fprintf(fp, "<ul>\n");
- } else if (p->type == para_NumberedList)
- {
- fprintf(fp, "<ol>\n");
- } else if (p->type == para_BiblioCited)
- {
- fprintf(fp, "<dl>\n");
- }
- }
- if (p->type == para_Bullet || p->type == para_NumberedList)
- fprintf(fp, "<li>");
- else if (p->type == para_BiblioCited)
- {
- fprintf(fp, "<dt>");
- xhtml_para(fp, p->kwtext);
- fprintf(fp, "</dt>\n<dd>");
- }
- xhtml_para(fp, p->words);
- if (p->type == para_BiblioCited)
- {
- fprintf(fp, "</dd>\n");
- } else if (p->type == para_Bullet || p->type == para_NumberedList)
- {
- fprintf(fp, "</li>");
- }
- if (p->type == para_Bullet || p->type == para_NumberedList
- || p->type == para_BiblioCited)
- /* close off list if necessary */
- {
- paragraph *p2 = p->next;
- int close_off = FALSE;
-/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/
- if (p2 && xhtml_para_level(p2) == -1)
- {
- if (p2->type != p->type)
- close_off = TRUE;
- } else
- {
- close_off = TRUE;
- }
- if (close_off)
- {
- if (p->type == para_Bullet)
- {
- fprintf(fp, "</ul>\n");
- } else if (p->type == para_NumberedList)
- {
- fprintf(fp, "</ol>\n");
- } else if (p->type == para_BiblioCited)
- {
- fprintf(fp, "</dl>\n");
- }
- }
- }
- break;
-
- case para_Code:
- xhtml_codepara(fp, p->words);
- break;
- }
- last_type = p->type;
- }
-}
-
-/*
- * Output a header for this XHTML file.
- */
-static void xhtml_doheader(FILE * fp, word * title)
-{
- fprintf(fp,
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n");
- fprintf(fp,
- "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n");
- fprintf(fp,
- "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>");
- if (title == NULL)
- fprintf(fp, "Documentation");
- else
- xhtml_para(fp, title);
- fprintf(fp, "</title>\n");
- fprintf(fp,
- "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n",
- version);
- if (conf.author)
- fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author);
- if (conf.description)
- fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n",
- conf.description);
- if (conf.head_end)
- fprintf(fp, "%ls\n", conf.head_end);
- fprintf(fp, "</head>\n\n");
- if (conf.body)
- fprintf(fp, "%ls\n", conf.body);
- else
- fprintf(fp, "<body>\n");
- if (conf.body_start)
- fprintf(fp, "%ls\n", conf.body_start);
-}
-
-static void chm_doheader(FILE * fp, word * title)
-{
- fprintf(fp, "<HTML><BODY><UL><LI><OBJECT type=\"text/sitemap\"><param name=\"Name\" value=\"");
- xhtml_para(fp, title);
- fprintf(fp,"\"><param name=\"Local\" value=\"Contents.html\"></OBJECT></li>\n");
-}
-
-/*
- * Output a footer for this XHTML file.
- */
-static void xhtml_dofooter(FILE * fp)
-{
- fprintf(fp, "\n<hr />\n\n");
- if (conf.body_end)
- fprintf(fp, "%ls\n", conf.body_end);
- if (!conf.suppress_address)
- {
- fprintf(fp, "<address>\n");
- if (conf.address_start)
- fprintf(fp, "%ls\n", conf.address_start);
- /* Do the version ID */
- if (conf.include_version_id)
- {
- paragraph *p;
- int started = 0;
- for (p = sourceparas; p; p = p->next)
- if (p->type == para_VersionID)
- {
- xhtml_versionid(fp, p->words, started);
- started = 1;
- }
- }
- if (conf.address_end)
- fprintf(fp, "%ls\n", conf.address_end);
- fprintf(fp, "</address>\n");
- }
- fprintf(fp, "</body>\n\n</html>\n");
-}
-static void chm_dofooter(FILE * fp)
-{
- fprintf(fp, "</ul></BODY></HTML>\n");
-}
-
-/*
- * Output the versionid paragraph. Typically this is a version control
- * ID string (such as $Id...$ in RCS).
- */
-static void xhtml_versionid(FILE * fp, word * text, int started)
-{
- rdstringc t = { 0, 0, NULL };
-
- rdaddc(&t, '['); /* FIXME: configurability */
- xhtml_rdaddwc(&t, text, NULL);
- rdaddc(&t, ']'); /* FIXME: configurability */
-
- if (started)
- fprintf(fp, "<br>\n");
- fprintf(fp, "%s\n", t.text);
- sfree(t.text);
-}
-
-/* Is this an XHTML reserved character? */
-static int xhtml_reservedchar(int c)
-{
- if (c == '&' || c == '<' || c == '>' || c == '"')
- return TRUE;
- else
- return FALSE;
-}
-
-/*
- * Convert a wide string into valid XHTML: Anything outside ASCII will
- * be fixed up as an entity. Currently we don't worry about constraining the
- * encoded character set, which we should probably do at some point (we can
- * still fix up and return FALSE - see the last comment here). We also don't
- * currently
- *
- * Because this is only used for words, spaces are HARD spaces (any other
- * spaces will be word_Whitespace not word_Normal). So they become
- * Unless hard_spaces is FALSE, of course (code paragraphs break the above
- * rule).
- *
- * If `result' is non-NULL, mallocs the resulting string and stores a pointer to
- * it in `*result'. If `result' is NULL, merely checks whether all
- * characters in the string are feasible.
- *
- * Return is nonzero if all characters are OK. If not all
- * characters are OK but `result' is non-NULL, a result _will_
- * still be generated!
- */
-static int xhtml_convert(wchar_t * s, char **result, int hard_spaces)
-{
- int doing = (result != 0);
- int ok = TRUE;
- char *p = NULL;
- int plen = 0, psize = 0;
-
- for (; *s; s++)
- {
- wchar_t c = *s;
-
-#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); }
-
- if (((c == 32 && !hard_spaces)
- || (c > 32 && c <= 126 && !xhtml_reservedchar(c))))
- {
- /* Char is OK. */
- if (doing)
- {
- ensure_size(plen);
- p[plen++] = (char) c;
- }
- } else
- {
- /* Char needs fixing up. */
- /* ok = FALSE; -- currently we never return FALSE; we
- * might want to when considering a character set for the
- * encoded document.
- */
- if (doing)
- {
- if (c == 32)
- { /* a space in a word is a hard space */
- ensure_size(plen + 7); /* includes space for the NUL, which is subsequently stomped on */
- sprintf(p + plen, " ");
- plen += 6;
- } else
- {
- switch (c)
- {
- case '&':
- ensure_size(plen + 6); /* includes space for the NUL, which is subsequently stomped on */
- plen += sprintf(p + plen, "&");
- break;
- case '"':
- ensure_size(plen + 7); /* includes space for the NUL, which is subsequently stomped on */
- plen += sprintf(p + plen, """);
- break;
- case '<':
- if (plen > 1 && *(s - 1) == '\\' && *(s - 2) == '\\')
- {
- ensure_size(--plen);
- p[plen - 1] = (char) c;
- p[plen] = 0;
- } else
- {
- ensure_size(plen + 5); /* includes space for the NUL, which is subsequently stomped on */
- plen += sprintf(p + plen, "<");
- }
- break;
- case '>':
- if (plen > 1 && *(s - 1) == '\\' && *(s - 2) == '\\')
- {
- ensure_size(--plen);
- p[plen - 1] = (char) c;
- p[plen] = 0;
- } else
- {
- ensure_size(plen + 5); /* includes space for the NUL, which is subsequently stomped on */
- plen += sprintf(p + plen, ">");
- }
- break;
- default:
- ensure_size(plen + 8); /* includes space for the NUL, which is subsequently stomped on */
- plen += sprintf(p + plen, "&#%04i;", (int) c);
- break;
- }
- }
- }
- }
- }
- if (doing)
- {
- p = resize(p, plen + 1);
- p[plen] = '\0';
- *result = p;
- }
-
- return ok;
-}
-
-/*
- * This formats the given words as XHTML.
- */
-static void xhtml_rdaddwc(rdstringc * rs, word * text, word * end)
-{
- char *c;
- keyword *kwl;
- xhtmlsection *sect;
- indextag *itag;
- int ti;
- wchar_t *s;
-
- for (; text && text != end; text = text->next)
- {
- switch (text->type)
- {
- case word_HyperLink:
- xhtml_utostr(text->text, &c);
- rdaddsc(rs, "<a href=\"");
- if(chm_toc && *c == '.' && *(c+1) == '.')
- rdaddsc(rs, c + 1);
- else
- rdaddsc(rs, c);
- rdaddsc(rs, "\">");
- sfree(c);
- break;
-
- case word_LocalHyperLink:
- xhtml_utostr(text->text, &c);
- rdaddsc(rs, "<a href=\"");
- if (conf.rlink_prefix)
- {
- char *c2;
- xhtml_utostr(conf.rlink_prefix, &c2);
- rdaddsc(rs, c2);
- sfree(c2);
- }
- rdaddsc(rs, c);
- if (conf.rlink_suffix)
- {
- char *c2;
- xhtml_utostr(conf.rlink_suffix, &c2);
- rdaddsc(rs, c2);
- sfree(c2);
- }
- rdaddsc(rs, "\">");
- sfree(c);
- break;
-
- case word_UpperXref:
- case word_LowerXref:
- case word_FreeTextXref:
- kwl = kw_lookup(keywords, text->text);
- if (kwl)
- {
- sect = xhtml_find_section(kwl->para);
- if (sect)
- {
- rdaddsc(rs, "<a href=\"");
- rdaddsc(rs, sect->file->filename);
- rdaddc(rs, '#');
- rdaddsc(rs, sect->fragment);
- rdaddsc(rs, "\">");
- } else
- {
- rdaddsc(rs,
- "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->");
- error(err_whatever,
- "Couldn't locate cross-reference! (Probably a bibliography entry.)");
- }
- } else
- {
- rdaddsc(rs,
- "<a href=\"Apologies.html\"><!-- unknown cross-reference -->");
- error(err_whatever,
- "Couldn't locate cross-reference! (Wasn't in source file.)");
- }
- break;
-
- case word_IndexRef: /* in theory we could make an index target here */
-/* rdaddsc(rs, "<a name=\"idx-");
- xhtml_utostr(text->text, &c);
- rdaddsc(rs, c);
- sfree(c);
- rdaddsc(rs, "\"></a>");*/
- /* what we _do_ need to do is to fix up the backend data
- * for any indexentry this points to.
- */
- for (ti = 0;
- (itag = (indextag *) index234(idx->tags, ti)) != NULL; ti++)
- {
- /* FIXME: really ustricmp() and not ustrcmp()? */
- if (ustricmp(itag->name, text->text) == 0)
- {
- break;
- }
- }
- if (itag != NULL)
- {
- if (itag->refs != NULL)
- {
- int i;
- for (i = 0; i < itag->nrefs; i++)
- {
- xhtmlindex *idx_ref;
- indexentry *ientry;
-
- ientry = itag->refs[i];
- if (ientry->backend_data == NULL)
- {
- idx_ref = (xhtmlindex *) smalloc(sizeof(xhtmlindex));
- if (idx_ref == NULL)
- fatal(err_nomemory);
- idx_ref->nsection = 0;
- idx_ref->size = 4;
- idx_ref->sections =
- (xhtmlsection **) smalloc(idx_ref->size *
- sizeof(xhtmlsection *));
- if (idx_ref->sections == NULL)
- fatal(err_nomemory);
- ientry->backend_data = idx_ref;
- } else
- {
- idx_ref = ientry->backend_data;
- if (idx_ref->nsection + 1 > idx_ref->size)
- {
- int new_size = idx_ref->size * 2;
- idx_ref->sections =
- srealloc(idx_ref->sections,
- new_size * sizeof(xhtmlsection));
- if (idx_ref->sections == NULL)
- {
- fatal(err_nomemory);
- }
- idx_ref->size = new_size;
- }
- }
- idx_ref->sections[idx_ref->nsection++] = currentsection;
-#if 0
-#endif
- }
- } else
- {
- fatal(err_whatever, "Index tag had no entries!");
- }
- } else
- {
- fprintf(stderr, "Looking for index entry '%ls'\n", text->text);
- fatal(err_whatever,
- "Couldn't locate index entry! (Wasn't in index.)");
- }
- break;
-
- case word_HyperEnd:
- case word_XrefEnd:
- rdaddsc(rs, "</a>");
- break;
-
- case word_Normal:
- case word_Emph:
- case word_Code:
- case word_WeakCode:
- case word_WhiteSpace:
- case word_EmphSpace:
- case word_CodeSpace:
- case word_WkCodeSpace:
- case word_Quote:
- case word_EmphQuote:
- case word_CodeQuote:
- case word_WkCodeQuote:
- assert(text->type != word_CodeQuote &&
- text->type != word_WkCodeQuote);
- if (towordstyle(text->type) == word_Emph &&
- (attraux(text->aux) == attr_First ||
- attraux(text->aux) == attr_Only))
- rdaddsc(rs, "<em>");
- else if ((towordstyle(text->type) == word_Code
- || towordstyle(text->type) == word_WeakCode)
- && (attraux(text->aux) == attr_First
- || attraux(text->aux) == attr_Only))
- rdaddsc(rs, "<code>");
-
- if (removeattr(text->type) == word_Normal)
- {
- static int dont_convert = 0;
- if (dont_convert)
- {
- char buf[2] = " ";
- dont_convert = 0;
- s = text->text;
- for (; *s; s++)
- {
- buf[0] = (char) *s;
- rdaddsc(rs, buf);
- }
- buf[0] = 0;
- rdaddsc(rs, buf);
- } else
- {
- if (*text->text == '\\' && text->next
- && text->next->text && (*text->next->text == '&'
- || *text->next->text == '<'
- || *text->next->text == '>'
- || *text->next->text == '"'))
- dont_convert = 1;
- else
- {
- if (xhtml_convert(text->text, &c, TRUE)) /* spaces in the word are hard */
- rdaddsc(rs, c);
- else
- xhtml_rdaddwc(rs, text->alt, NULL);
- sfree(c);
- }
- }
- } else if (removeattr(text->type) == word_WhiteSpace)
- {
- rdaddc(rs, ' ');
- } else if (removeattr(text->type) == word_Quote)
- {
- rdaddsc(rs, """);
- }
-
- if (towordstyle(text->type) == word_Emph &&
- (attraux(text->aux) == attr_Last ||
- attraux(text->aux) == attr_Only))
- rdaddsc(rs, "</em>");
- else if ((towordstyle(text->type) == word_Code
- || towordstyle(text->type) == word_WeakCode)
- && (attraux(text->aux) == attr_Last
- || attraux(text->aux) == attr_Only))
- rdaddsc(rs, "</code>");
- break;
- }
- }
-}
-
-/* Output a heading, formatted as XHTML.
- */
-static void xhtml_heading(FILE * fp, paragraph * p)
-{
- rdstringc t = { 0, 0, NULL };
- word *tprefix = p->kwtext;
- word *nprefix = p->kwtext2;
- word *text = p->words;
- int level = xhtml_para_level(p);
- xhtmlsection *sect = xhtml_find_section(p);
- xhtmlheadfmt *fmt;
- char *fragment;
- if (sect)
- {
- fragment = sect->fragment;
- } else
- {
- if (p->type == para_Title)
- fragment = "title";
- else
- {
- fragment = ""; /* FIXME: what else can we do? */
- error(err_whatever, "Couldn't locate heading cross-reference!");
- }
- }
-
- if (p->type == para_Title)
- fmt = NULL;
- else if (level == 1)
- fmt = &conf.fchapter;
- else if (level - 1 < conf.nfsect)
- fmt = &conf.fsect[level - 1];
- else
- fmt = &conf.fsect[conf.nfsect - 1];
-
- if (fmt && fmt->just_numbers && nprefix)
- {
- xhtml_rdaddwc(&t, nprefix, NULL);
- if (fmt)
- {
- char *c;
- if (xhtml_convert(fmt->number_suffix, &c, FALSE))
- {
- rdaddsc(&t, c);
- sfree(c);
- }
- }
- } else if (fmt && !fmt->just_numbers && tprefix)
- {
- xhtml_rdaddwc(&t, tprefix, NULL);
- if (fmt)
- {
- char *c;
- if (xhtml_convert(fmt->number_suffix, &c, FALSE))
- {
- rdaddsc(&t, c);
- sfree(c);
- }
- }
- }
- xhtml_rdaddwc(&t, text, NULL);
- /*
- * If we're outputting in single-file mode, we need to lower
- * the level of each heading by one, because the overall
- * document title will be sitting right at the top as an <h1>
- * and so chapters and sections should start at <h2>.
- *
- * Even if not, the document title will come back from
- * xhtml_para_level() as level zero, so we must increment that
- * no matter what leaf_level is set to.
- */
- if (conf.leaf_level == 0 || level == 0)
- level++;
- fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level,
- t.text, level);
- sfree(t.text);
-}
-
-/* Output a paragraph. Styles are handled by xhtml_rdaddwc().
- * This looks pretty simple; I may have missed something ...
- */
-static void xhtml_para(FILE * fp, word * text)
-{
- rdstringc out = { 0, 0, NULL };
- xhtml_rdaddwc(&out, text, NULL);
- fprintf(fp, "%s", out.text);
- sfree(out.text);
-}
-
-/* Output a code paragraph. I'm treating this as preformatted, which
- * may not be entirely correct. See xhtml_para() for my worries about
- * this being overly-simple; however I think that most of the complexity
- * of the text backend came entirely out of word wrapping anyway.
- */
-static void xhtml_codepara(FILE * fp, word * text)
-{
- fprintf(fp, "<pre>");
- for (; text; text = text->next)
- if (text->type == word_WeakCode)
- {
- char *c;
- xhtml_convert(text->text, &c, FALSE);
- fprintf(fp, "%s\n", c);
- sfree(c);
- }
- fprintf(fp, "</pre>\n");
-}
+/* + * xhtml backend for Halibut + * (initial implementation by James Aylett) + * + * Still to do: + * + * +++ doesn't handle non-breaking hyphens. Not sure how to yet. + * +++ entity names (from a file -- ideally supply normal SGML files) + * +++ configuration directive to file split where the current layout + * code wouldn't. Needs changes to _ponder_layout() and _do_paras(), + * perhaps others. + * + * Limitations: + * + * +++ biblio/index references target the nearest section marker, rather + * than having a dedicated target themselves. In large bibliographies + * this will cause problems. (The solution is to fake up a response + * from xhtml_find_section(), probably linking it into the sections + * chain just in case we need it again, and to make freeing it up + * easier.) docsrc.pl used to work as we do, however, and SGT agrees that + * this is acceptable for now. + * +++ can't cope with leaf-level == 0. It's all to do with the + * top-level file not being normal, probably not even having a valid + * section level, and stuff like that. I question whether this is an + * issue, frankly; small manuals that fit on one page should probably + * not be written in halibut at all. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include "halibut.h" + +struct xhtmlsection_Struct { + struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */ + struct xhtmlsection_Struct *child; /* NULL if split across files */ + struct xhtmlsection_Struct *parent; /* NULL if split across files */ + struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */ + paragraph *para; + struct xhtmlfile_Struct *file; /* which file is this a part of? */ + char *fragment; /* fragment id within the file */ + int level; +}; + +struct xhtmlfile_Struct { + struct xhtmlfile_Struct *next; + struct xhtmlfile_Struct *child; + struct xhtmlfile_Struct *parent; + char *filename; + struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */ + int is_leaf; /* is this file a leaf file, ie does it not have any children? */ +}; + +typedef struct xhtmlsection_Struct xhtmlsection; +typedef struct xhtmlfile_Struct xhtmlfile; +typedef struct xhtmlindex_Struct xhtmlindex; + +struct xhtmlindex_Struct { + int nsection; + int size; + xhtmlsection **sections; +}; + +typedef struct { + int just_numbers; + wchar_t *number_suffix; +} xhtmlheadfmt; + +typedef struct { + int contents_depth[6]; + int leaf_contains_contents; + int leaf_level; + int leaf_smallest_contents; + int include_version_id; + wchar_t *author, *description; + wchar_t *head_end, *body, *body_start, *body_end, *address_start, + *address_end, *nav_attrs; + wchar_t *rlink_prefix, *rlink_suffix; + wchar_t *chm_toc_file, *chm_ind_file; + int suppress_address; + xhtmlheadfmt fchapter, *fsect; + int nfsect; +} xhtmlconfig; + +/*static void xhtml_level(paragraph *, int); +static void xhtml_level_0(paragraph *); +static void xhtml_docontents(FILE *, paragraph *, int); +static void xhtml_dosections(FILE *, paragraph *, int); +static void xhtml_dobody(FILE *, paragraph *, int);*/ + +static void xhtml_doheader(FILE *, word *); +static void xhtml_dofooter(FILE *); +static void xhtml_versionid(FILE *, word *, int); + +static void xhtml_utostr(wchar_t *, char **); +static int xhtml_para_level(paragraph *); +static int xhtml_reservedchar(int); + +static int xhtml_convert(wchar_t *, char **, int); +static void xhtml_rdaddwc(rdstringc *, word *, word *); +static void xhtml_para(FILE *, word *); +static void xhtml_codepara(FILE *, word *); +static void xhtml_heading(FILE *, paragraph *); + +static void chm_doheader(FILE *, word *); +static void chm_dofooter(FILE *); +/* File-global variables are much easier than passing these things + * all over the place. Evil, but easier. We can replace this with a single + * structure at some point. + */ +static xhtmlconfig conf; +static keywordlist *keywords; +static indexdata *idx; +static xhtmlfile *topfile; +static xhtmlsection *topsection; +static paragraph *sourceparas; +static xhtmlfile *lastfile; +static xhtmlfile *xhtml_last_file = NULL; +static int last_level = -1; +static xhtmlsection *currentsection; +static FILE* chm_toc = NULL; +static FILE* chm_ind = NULL; + + +static xhtmlconfig xhtml_configure(paragraph * source) +{ + xhtmlconfig ret; + + /* + * Defaults. + */ + ret.contents_depth[0] = 2; + ret.contents_depth[1] = 3; + ret.contents_depth[2] = 4; + ret.contents_depth[3] = 5; + ret.contents_depth[4] = 6; + ret.contents_depth[5] = 7; + ret.leaf_level = 2; + ret.leaf_smallest_contents = 4; + ret.leaf_contains_contents = FALSE; + ret.include_version_id = TRUE; + ret.author = NULL; + ret.description = NULL; + ret.head_end = NULL; + ret.body = NULL; + ret.body_start = NULL; + ret.body_end = NULL; + ret.address_start = NULL; + ret.address_end = NULL; + ret.nav_attrs = NULL; + ret.suppress_address = FALSE; + ret.chm_toc_file = NULL; + ret.chm_ind_file = NULL; + chm_toc = NULL; + chm_ind = NULL; + ret.fchapter.just_numbers = FALSE; + ret.fchapter.number_suffix = ustrdup(L": "); + ret.nfsect = 2; + ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect); + ret.fsect[0].just_numbers = FALSE; + ret.fsect[0].number_suffix = ustrdup(L": "); + ret.fsect[1].just_numbers = TRUE; + ret.fsect[1].number_suffix = ustrdup(L" "); + ret.rlink_prefix = NULL; + ret.rlink_suffix = NULL; + + for (; source; source = source->next) + { + if (source->type == para_Config) + { + if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) + { + ret.contents_depth[0] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) + { + ret.contents_depth[1] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) + { + ret.contents_depth[2] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) + { + ret.contents_depth[3] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) + { + ret.contents_depth[4] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) + { + ret.contents_depth[5] = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) + { + ret.leaf_level = utoi(uadv(source->keyword)); + } else + if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) + { + ret.leaf_smallest_contents = utoi(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-versionid")) + { + ret.include_version_id = utob(uadv(source->keyword)); + } else + if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) + { + ret.leaf_contains_contents = utob(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) + { + ret.suppress_address = utob(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-author")) + { + ret.author = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"chm-toc-file")) + { + ret.chm_toc_file = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"chm-ind-file")) + { + ret.chm_ind_file = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-description")) + { + ret.description = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-head-end")) + { + ret.head_end = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-body-start")) + { + ret.body_start = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) + { + ret.body = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-body-end")) + { + ret.body_end = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-address-start")) + { + ret.address_start = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-address-end")) + { + ret.address_end = uadv(source->keyword); + } else + if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) + { + ret.nav_attrs = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) + { + ret.fchapter.just_numbers = utob(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) + { + ret.fchapter.number_suffix = ustrdup(uadv(source->keyword)); + } else if (!ustricmp(source->keyword, L"xhtml-rlink-prefix")) + { + ret.rlink_prefix = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-rlink-suffix")) + { + ret.rlink_suffix = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) + { + wchar_t *p = uadv(source->keyword); + int n = 0; + if (uisdigit(*p)) + { + n = utoi(p); + p = uadv(p); + } + if (n >= ret.nfsect) + { + int i; + ret.fsect = resize(ret.fsect, n + 1); + for (i = ret.nfsect; i <= n; i++) + ret.fsect[i] = ret.fsect[ret.nfsect - 1]; + ret.nfsect = n + 1; + } + ret.fsect[n].just_numbers = utob(p); + } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) + { + wchar_t *p = uadv(source->keyword); + int n = 0; + if (uisdigit(*p)) + { + n = utoi(p); + p = uadv(p); + } + if (n >= ret.nfsect) + { + int i; + ret.fsect = resize(ret.fsect, n + 1); + for (i = ret.nfsect; i <= n; i++) + ret.fsect[i] = ret.fsect[ret.nfsect - 1]; + ret.nfsect = n + 1; + } + ret.fsect[n].number_suffix = ustrdup(p); + } + } + } + + /* printf(" !!! leaf_level = %i\n", ret.leaf_level); + printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]); + printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]); + printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]); + printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]); + printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]); + printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]); + printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents); */ + return ret; +} + +static xhtmlsection *xhtml_new_section(xhtmlsection * last) +{ + xhtmlsection *ret = mknew(xhtmlsection); + ret->next = NULL; + ret->child = NULL; + ret->parent = NULL; + ret->chain = last; + ret->para = NULL; + ret->file = NULL; + ret->fragment = NULL; + ret->level = -1; /* marker: end of chain */ + return ret; +} + +/* Returns NULL or the section that marks that paragraph */ +static xhtmlsection *xhtml_find_section(paragraph * p) +{ + xhtmlsection *ret = topsection; + if (xhtml_para_level(p) == -1) + { /* first, we back-track to a section paragraph */ + paragraph *p2 = sourceparas; + paragraph *p3 = NULL; + while (p2 && p2 != p) + { + if (xhtml_para_level(p2) != -1) + { + p3 = p2; + } + p2 = p2->next; + } + if (p3 == NULL) + { /* for some reason, we couldn't find a section before this paragraph ... ? */ + /* Note that this can happen, if you have a cross-reference to before the first chapter starts. + * So don't do that, then. + */ + return NULL; + } + p = p3; + } + while (ret && ret->para != p) + { +/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/ + ret = ret->chain; + } + return ret; +} + +static xhtmlfile *xhtml_new_file(xhtmlsection * sect) +{ + xhtmlfile *ret = mknew(xhtmlfile); + + ret->next = NULL; + ret->child = NULL; + ret->parent = NULL; + ret->filename = NULL; + ret->sections = sect; + ret->is_leaf = (sect != NULL && sect->level == conf.leaf_level); + if (sect == NULL) + { + if (conf.leaf_level == 0) + { /* currently unused */ +#define FILENAME_MANUAL "Manual.html" +#define FILENAME_CONTENTS "Contents.html" + ret->filename = smalloc(strlen(FILENAME_MANUAL) + 1); + sprintf(ret->filename, FILENAME_MANUAL); + } else + { + ret->filename = smalloc(strlen(FILENAME_CONTENTS) + 1); + sprintf(ret->filename, FILENAME_CONTENTS); + } + } else + { + paragraph *p = sect->para; + rdstringc fname_c = { 0, 0, NULL }; + char *c; + word *w; + for (w = (p->kwtext) ? (p->kwtext) : (p->words); w; w = w->next) + { + switch (removeattr(w->type)) + { + case word_Normal: + /*case word_Emph: + case word_Code: + case word_WeakCode: */ + xhtml_utostr(w->text, &c); + rdaddsc(&fname_c, c); + sfree(c); + break; + } + } + rdaddsc(&fname_c, ".html"); + ret->filename = rdtrimc(&fname_c); + } + /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false")); */ + return ret; +} + +/* + * Walk the tree fixing up files which are actually leaf (ie + * have no children) but aren't at leaf level, so they have the + * leaf flag set. + */ +void xhtml_fixup_layout(xhtmlfile * file) +{ + if (file->child == NULL) + { + file->is_leaf = TRUE; + } else + { + xhtml_fixup_layout(file->child); + } + if (file->next) + xhtml_fixup_layout(file->next); +} + +/* + * Create the tree structure so we know where everything goes. + * Method: + * + * Ignoring file splitting, we have three choices with each new section: + * + * +-----------------+-----------------+ + * | | | + * X +----X----+ (1) + * | | + * Y (2) + * | + * (3) + * + * Y is the last section we added (currentsect). + * If sect is the section we want to add, then: + * + * (1) if sect->level < currentsect->level + * (2) if sect->level == currentsect->level + * (3) if sect->level > currentsect->level + * + * This requires the constraint that you never skip section numbers + * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing). + * + * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change + * more than one level at a time. Lots of asserts, and probably part of + * the algorithm here, rely on this being true. (It currently isn't + * enforced by halibut, however.) + * + * File splitting makes this harder. For instance, say we added at (3) + * above and now need to add another section. We are splitting at level + * 2, ie the level of Y. Z is the last section we added: + * + * +-----------------+-----------------+ + * | | | + * X +----X----+ (1) + * | | + * +----Y----+ (1) + * | | + * Z (2) + * | + * (3) + * + * The (1) case is now split; we need to search upwards to find where + * to actually link in. The other two cases remain the same (and will + * always be like this). + * + * File splitting makes this harder, however. The decision of whether + * to split to a new file is always on the same condition, however (is + * the level of this section higher than the leaf_level configuration + * value or not). + * + * Treating the cases backwards: + * + * (3) same file if sect->level > conf.leaf_level, otherwise new file + * + * if in the same file, currentsect->child points to sect + * otherwise the linking is done through the file tree (which works + * in more or less the same way, ie currentfile->child points to + * the new file) + * + * (2) same file if sect->level > conf.leaf_level, otherwise new file + * + * if in the same file, currentsect->next points to sect + * otherwise file linking and currentfile->next points to the new + * file (we know that Z must have caused a new file to be created) + * + * (1) same file if sect->level > conf.leaf_level, otherwise new file + * + * this is actually effectively the same case as (2) here, + * except that we first have to travel up the sections to figure + * out which section this new one will be a sibling of. In doing + * so, we may disappear off the top of a file and have to go up + * to its parent in the file tree. + * + */ +static void xhtml_ponder_layout(paragraph * p) +{ + xhtmlsection *lastsection; + xhtmlsection *currentsect; + xhtmlfile *currentfile; + + lastfile = NULL; + topsection = xhtml_new_section(NULL); + topfile = xhtml_new_file(NULL); + lastsection = topsection; + currentfile = topfile; + currentsect = topsection; + + if (conf.leaf_level == 0) + { + topfile->is_leaf = 1; + topfile->sections = topsection; + topsection->file = topfile; + } + + for (; p; p = p->next) + { + int level = xhtml_para_level(p); + if (level > 0) + { /* actually a section */ + xhtmlsection *sect; + word *w; + char *c; + rdstringc fname_c = { 0, 0, NULL }; + + sect = xhtml_new_section(lastsection); + lastsection = sect; + sect->para = p; + for (w = (p->kwtext2) ? (p->kwtext2) : (p->words); w; w = w->next) + { /* kwtext2 because we want numbers only! */ + switch (removeattr(w->type)) + { + case word_Normal: + /*case word_Emph: + case word_Code: + case word_WeakCode: */ + xhtml_utostr(w->text, &c); + rdaddsc(&fname_c, c); + sfree(c); + break; + } + } +/* rdaddsc(&fname_c, ".html");*/ + sect->fragment = rdtrimc(&fname_c); + sect->level = level; + /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level); */ + + if (level > currentsect->level) + { /* case (3) */ + if (level > conf.leaf_level) + { /* same file */ + assert(currentfile->is_leaf); + currentsect->child = sect; + sect->parent = currentsect; + sect->file = currentfile; + /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename); */ + currentsect = sect; + } else + { /* new file */ + xhtmlfile *file = xhtml_new_file(sect); + assert(!currentfile->is_leaf); + currentfile->child = file; + sect->file = file; + file->parent = currentfile; + /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename); */ + currentfile = file; + currentsect = sect; + } + } else if (level >= currentsect->file->sections->level) + { + /* Case (1) or (2) *AND* still under the section that starts + * the current file. + * + * I'm not convinced that this couldn't be rolled in with the + * final else {} leg further down. It seems a lot of effort + * this way. + */ + if (level > conf.leaf_level) + { /* stick within the same file */ + assert(currentfile->is_leaf); + sect->file = currentfile; + while (currentsect && currentsect->level > level && + currentsect->file == currentsect->parent->file) + { + currentsect = currentsect->parent; + } + assert(currentsect); + currentsect->next = sect; + assert(currentsect->level == sect->level); + sect->parent = currentsect->parent; + currentsect = sect; + /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename); */ + } else + { /* new file */ + xhtmlfile *file = xhtml_new_file(sect); + sect->file = file; + currentfile->next = file; + file->parent = currentfile->parent; + file->is_leaf = (level == conf.leaf_level); + file->sections = sect; + /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename); */ + currentfile = file; + currentsect = sect; + } + } else + { /* Case (1) or (2) and we must move up the file tree first */ + /* this loop is now probably irrelevant - we know we can't connect + * to anything in the current file */ + while (currentsect && level < currentsect->level) + { + currentsect = currentsect->parent; + if (currentsect) + { + /* printf(" * up one level to '%s'\n", currentsect->fragment); */ + } else + { + /* printf(" * up one level (off top of current file)\n"); */ + } + } + if (currentsect) + { + /* I'm pretty sure this can now never fire */ + assert(currentfile->is_leaf); + /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename); */ + sect->file = currentfile; + currentsect->next = sect; + currentsect = sect; + } else + { /* find a file we can attach to */ + while (currentfile && currentfile->sections + && level < currentfile->sections->level) + { + currentfile = currentfile->parent; + if (currentfile) + { + /* printf(" * up one file level to '%s'\n", currentfile->filename); */ + } else + { + /* printf(" * up one file level (off top of tree)\n"); */ + } + } + if (currentfile) + { /* new file (we had to skip up a file to + get here, so we must be dealing with a + level no lower than the configured + leaf_level */ + xhtmlfile *file = xhtml_new_file(sect); + currentfile->next = file; + sect->file = file; + file->parent = currentfile->parent; + file->is_leaf = (level == conf.leaf_level); + file->sections = sect; + /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename); */ + currentfile = file; + currentsect = sect; + } else + { + fatal(err_whatever, + "Ran off the top trying to connect sibling: strange document."); + } + } + } + } + } + topsection = lastsection; /* get correct end of the chain */ + xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */ +} + +static void xhtml_do_index(); +static void xhtml_do_file(xhtmlfile * file); +static void xhtml_do_top_file(xhtmlfile * file, paragraph * sourceform); +static void xhtml_do_paras(FILE * fp, paragraph * p); +static int xhtml_do_contents_limit(FILE * fp, xhtmlfile * file, int limit); +static int xhtml_do_contents_section_limit(FILE * fp, xhtmlsection * section, int limit); +static int xhtml_add_contents_entry(FILE * fp, xhtmlsection * section, int limit); +static int xhtml_do_contents(FILE * fp, xhtmlfile * file); +static int xhtml_do_naked_contents(FILE * fp, xhtmlfile * file); +static void xhtml_do_sections(FILE * fp, xhtmlsection * sections); + +/* + * Do all the files in this structure. + */ +static void xhtml_do_files(xhtmlfile * file) +{ + xhtml_do_file(file); + if (file->child) + xhtml_do_files(file->child); + if (file->next) + xhtml_do_files(file->next); +} + +/* + * Free up all memory used by the file tree from 'xfile' downwards + */ +static void xhtml_free_file(xhtmlfile * xfile) +{ + if (xfile == NULL) + { + return; + } + + if (xfile->filename) + { + sfree(xfile->filename); + } + xhtml_free_file(xfile->child); + xhtml_free_file(xfile->next); + sfree(xfile); +} + +/* + * Main function. + */ +void +xhtml_backend(paragraph * sourceform, keywordlist * in_keywords, + indexdata * in_idx) +{ +/* int i;*/ + indexentry *ientry; + int ti; + xhtmlsection *xsect; + + sourceparas = sourceform; + conf = xhtml_configure(sourceform); + keywords = in_keywords; + idx = in_idx; + + /* Clear up the index entries backend data pointers */ + for (ti = 0; + (ientry = (indexentry *) index234(idx->entries, ti)) != NULL; ti++) + { + ientry->backend_data = NULL; + } + + xhtml_ponder_layout(sourceform); + + /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */ +/* xhtml_level_0(sourceform); + for (i=1; i<=conf.leaf_level; i++) + { + xhtml_level(sourceform, i); + }*/ + + /* new system ... (writes to *.html, but isn't fully trusted) */ + xhtml_do_top_file(topfile, sourceform); + assert(!topfile->next); /* shouldn't have a sibling at all */ + if (topfile->child) + { + xhtml_do_files(topfile->child); + xhtml_do_index(); + } + + /* release file, section, index data structures */ + xsect = topsection; + while (xsect) + { + xhtmlsection *tmp = xsect->chain; + if (xsect->fragment) + { + sfree(xsect->fragment); + } + sfree(xsect); + xsect = tmp; + } + xhtml_free_file(topfile); + for (ti = 0; + (ientry = (indexentry *) index234(idx->entries, ti)) != NULL; ti++) + { + if (ientry->backend_data != NULL) + { + xhtmlindex *xi = (xhtmlindex *) ientry->backend_data; + if (xi->sections != NULL) + { + sfree(xi->sections); + } + sfree(xi); + } + ientry->backend_data = NULL; + } + { + int i; + sfree(conf.fchapter.number_suffix); + for (i = 0; i < conf.nfsect; i++) + sfree(conf.fsect[i].number_suffix); + sfree(conf.fsect); + } +} + +static int xhtml_para_level(paragraph * p) +{ + switch (p->type) + { + case para_Title: + return 0; + break; + case para_UnnumberedChapter: + case para_Chapter: + case para_Appendix: + return 1; + break; +/* case para_BiblioCited: + return 2; + break;*/ + case para_Heading: + case para_Subsect: + return p->aux + 2; + break; + default: + return -1; + break; + } +} + +static char *xhtml_index_filename = "IndexPage.html"; + +/* Output the nav links for the current file. + * file == NULL means we're doing the index + */ +static void xhtml_donavlinks(FILE * fp, xhtmlfile * file) +{ + xhtmlfile *xhtml_next_file = NULL; + fprintf(fp, "<p"); + if (conf.nav_attrs != NULL) + { + fprintf(fp, " %ls>", conf.nav_attrs); + } else + { + fprintf(fp, ">"); + } + if (xhtml_last_file == NULL) + { + fprintf(fp, "Previous | "); + } else + { + fprintf(fp, "<a href='%s'>Previous</a> | ", xhtml_last_file->filename); + } + fprintf(fp, "<a href='Contents.html'>Contents</a> | "); + if (file != NULL) + { /* otherwise we're doing nav links for the index */ + if (xhtml_next_file == NULL) + xhtml_next_file = file->child; + if (xhtml_next_file == NULL) + xhtml_next_file = file->next; + if (xhtml_next_file == NULL) + xhtml_next_file = file->parent->next; + } + if (xhtml_next_file == NULL) + { + if (file == NULL) + { /* index, so no next file */ + fprintf(fp, "Next "); + } else + { + fprintf(fp, "<a href='%s'>Next</a>", xhtml_index_filename); + } + } else + { + fprintf(fp, "<a href='%s'>Next</a>", xhtml_next_file->filename); + } + fprintf(fp, "</p>\n"); +} + +/* Write out the index file */ +static void xhtml_do_index_body(FILE * fp) +{ + indexentry *y; + int ti; + + if (count234(idx->entries) == 0) + return; /* don't write anything at all */ + + fprintf(fp, "<dl>\n"); + /* iterate over idx->entries using the tree functions and display everything */ + for (ti = 0; (y = (indexentry *) index234(idx->entries, ti)) != NULL; + ti++) + { + if (y->backend_data) + { + int i; + xhtmlindex *xi; + + fprintf(fp, "<dt>"); + xhtml_para(fp, y->text); + fprintf(fp, "</dt>\n<dd>"); + + xi = (xhtmlindex *) y->backend_data; + for (i = 0; i < xi->nsection; i++) + { + xhtmlsection *sect = xi->sections[i]; + if (sect) + { + fprintf(fp, "<a href='%s#%s'>", sect->file->filename, + sect->fragment); + if (sect->para->kwtext) + { + xhtml_para(fp, sect->para->kwtext); + } else if (sect->para->words) + { + xhtml_para(fp, sect->para->words); + } + fprintf(fp, "</a>"); + if (i + 1 < xi->nsection) + { + fprintf(fp, ", "); + } + } + } + fprintf(fp, "</dd>\n"); + } + } + fprintf(fp, "</dl>\n"); +} +static void xhtml_do_index() +{ + word temp_word = + { NULL, NULL, word_Normal, 0, 0, L"Index", {NULL, 0, 0} }; + FILE *fp = fopen(xhtml_index_filename, "w"); + + if (fp == NULL) + fatal(err_cantopenw, xhtml_index_filename); + xhtml_doheader(fp, &temp_word); + xhtml_donavlinks(fp, NULL); + + xhtml_do_index_body(fp); + + xhtml_donavlinks(fp, NULL); + xhtml_dofooter(fp); + fclose(fp); +} + +/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */ +static void xhtml_do_file(xhtmlfile * file) +{ + FILE *fp = fopen(file->filename, "w"); + if (fp == NULL) + fatal(err_cantopenw, file->filename); + + if (file->sections->para->words) + { + xhtml_doheader(fp, file->sections->para->words); + } else if (file->sections->para->kwtext) + { + xhtml_doheader(fp, file->sections->para->kwtext); + } else + { + xhtml_doheader(fp, NULL); + } + + xhtml_donavlinks(fp, file); + + if (file->is_leaf && conf.leaf_contains_contents && + xhtml_do_contents(NULL, file) >= conf.leaf_smallest_contents) + xhtml_do_contents(fp, file); + xhtml_do_sections(fp, file->sections); + if (!file->is_leaf) + xhtml_do_naked_contents(fp, file); + + xhtml_donavlinks(fp, file); + + xhtml_dofooter(fp); + fclose(fp); + + xhtml_last_file = file; +} + +/* Output the top-level file. */ +static void xhtml_do_top_file(xhtmlfile * file, paragraph * sourceform) +{ + paragraph *p; + char fname[4096]; + int done = FALSE; + + FILE *fp = fopen(file->filename, "w"); + if (fp == NULL) + fatal(err_cantopenw, file->filename); + + ustrtoa(conf.chm_toc_file, fname, 4096); + if(*fname) + { + chm_toc = fopen(fname, "w"); + if (chm_toc == NULL) + fatal(err_cantopenw, fname); + } + else + chm_toc = NULL; + + ustrtoa(conf.chm_ind_file, fname, 4096); + if(*fname){ + chm_ind = fopen(fname, "w"); + if (chm_ind == NULL) + fatal(err_cantopenw, fname); + } + else + chm_ind = NULL; + + /* Do the title -- only one allowed */ + for (p = sourceform; p && !done; p = p->next) + { + if (p->type == para_Title) + { + xhtml_doheader(fp, p->words); + if(chm_toc)chm_doheader(chm_toc, p->words); + if(chm_ind)chm_doheader(chm_ind, p->words); + done = TRUE; + } + } + if (!done) + xhtml_doheader(fp, NULL /* Eek! */ ); + + /* + * Display the title. + */ + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Title) + { + xhtml_heading(fp, p); + break; + } + } + + /* Do the preamble and copyright */ + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Preamble) + { + fprintf(fp, "<p>"); + xhtml_para(fp, p->words); + fprintf(fp, "</p>\n"); + } + } + for (p = sourceform; p; p = p->next) + { + if (p->type == para_Copyright) + { + fprintf(fp, "<p>"); + xhtml_para(fp, p->words); + fprintf(fp, "</p>\n"); + } + } + + xhtml_do_contents(fp, file); + xhtml_do_sections(fp, file->sections); + + /* + * Put the index in the top file if we're in single-file mode + * (leaf-level 0). + */ + if (conf.leaf_level == 0 && count234(idx->entries) > 0) + { + fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n"); + xhtml_do_index_body(fp); + } + + xhtml_dofooter(fp); + if(chm_toc)chm_dofooter(chm_toc); + if(chm_ind)chm_dofooter(chm_ind); + fclose(fp); + if(chm_toc) + { + fclose(chm_toc); + chm_toc = NULL; + } + if(chm_ind) + { + fclose(chm_ind); + chm_ind = NULL; + } +} + +/* Convert a Unicode string to an ASCII one. '?' is + * used for unmappable characters. + */ +static void xhtml_utostr(wchar_t * in, char **out) +{ + int l = ustrlen(in); + int i; + *out = smalloc(l + 1); + for (i = 0; i < l; i++) + { + if (in[i] >= 32 && in[i] <= 126) + (*out)[i] = (char) in[i]; + else + (*out)[i] = '?'; + } + (*out)[i] = 0; +} + +/* + * Write contents for the given file, and subfiles, down to + * the appropriate contents depth. Returns the number of + * entries written. + */ +static int xhtml_do_contents(FILE * fp, xhtmlfile * file) +{ + int level, limit, start_level, count = 0; + if (!file) + return 0; + + level = (file->sections) ? (file->sections->level) : (0); + limit = conf.contents_depth[(level > 5) ? (5) : (level)]; + start_level = (file->is_leaf) ? (level - 1) : (level); + last_level = start_level; + + count += xhtml_do_contents_section_limit(fp, file->sections, limit); + count += xhtml_do_contents_limit(fp, file->child, limit); + if (fp != NULL) + { + while (last_level > start_level) + { + last_level--; + fprintf(fp, "</ul>\n"); + if(chm_toc)fprintf(chm_toc, "</ul>\n"); + } + } + return count; +} + +/* As above, but doesn't do anything in the current file */ +static int xhtml_do_naked_contents(FILE * fp, xhtmlfile * file) +{ + int level, limit, start_level, count = 0; + if (!file) + return 0; + + level = (file->sections) ? (file->sections->level) : (0); + limit = conf.contents_depth[(level > 5) ? (5) : (level)]; + start_level = (file->is_leaf) ? (level - 1) : (level); + last_level = start_level; + + count = xhtml_do_contents_limit(fp, file->child, limit); + if (fp != NULL) + { + while (last_level > start_level) + { + last_level--; + fprintf(fp, "</ul>\n"); + if(chm_toc)fprintf(chm_toc, "</ul>\n"); + } + } + return count; +} + +/* + * Write contents for the given file, children, and siblings, down to + * given limit contents depth. + */ +static int xhtml_do_contents_limit(FILE * fp, xhtmlfile * file, int limit) +{ + int count = 0; + while (file) + { + count += xhtml_do_contents_section_limit(fp, file->sections, limit); + count += xhtml_do_contents_limit(fp, file->child, limit); + file = file->next; + } + return count; +} + +/* + * Write contents entries for the given section tree, down to the + * limit contents depth. + */ +static int +xhtml_do_contents_section_deep_limit(FILE * fp, xhtmlsection * section, + int limit) +{ + int count = 0; + while (section) + { + if (!xhtml_add_contents_entry(fp, section, limit)) + return 0; + else + count++; + count += + xhtml_do_contents_section_deep_limit(fp, section->child, limit); + section = section->next; + } + return count; +} + +/* + * Write contents entries for the given section tree, down to the + * limit contents depth. + */ +static int +xhtml_do_contents_section_limit(FILE * fp, xhtmlsection * section, int limit) +{ + int count = 0; + if (!section) + return 0; + xhtml_add_contents_entry(fp, section, limit); + count = 1; + count += xhtml_do_contents_section_deep_limit(fp, section->child, limit); + /* section=section->child; + while (section && xhtml_add_contents_entry(fp, section, limit)) { + section = section->next; + } */ + return count; +} + +/* + * Add a section entry, unless we're exceeding the limit, in which + * case return FALSE (otherwise return TRUE). + */ +static int +xhtml_add_contents_entry(FILE * fp, xhtmlsection * section, int limit) +{ + if (!section || section->level > limit) + return FALSE; + if (fp == NULL || section->level < 0) + return TRUE; + while (last_level > section->level) + { + last_level--; + fprintf(fp, "</ul>\n"); + if(chm_toc)fprintf(chm_toc, "</ul>\n"); + } + while (last_level < section->level) + { + last_level++; + fprintf(fp, "<ul>\n"); + if(chm_toc)fprintf(chm_toc, "<ul>\n"); + } + fprintf(fp, "<li>"); + fprintf(fp, "<a %shref=\"%s#%s\">", + (section->para->type == para_Chapter|| section->para->type == para_Appendix) ? "class=\"btitle\" " : "", + section->file->filename, + (section->para->type == para_Chapter) ? "" : section->fragment); + if(chm_toc)fprintf(chm_toc, "<li><OBJECT type=\"text/sitemap\"><param name=\"Local\" value=\"%s#%s\"><param name=\"Name\" value=\"", + section->file->filename, + (section->para->type == para_Chapter) ? "" : section->fragment); + if(chm_ind)fprintf(chm_ind, "<li><OBJECT type=\"text/sitemap\"><param name=\"Local\" value=\"%s#%s\"><param name=\"Name\" value=\"", + section->file->filename, + (section->para->type == para_Chapter) ? "" : section->fragment); + //%s + if (section->para->type == para_Chapter + || section->para->type == para_Appendix) + fprintf(fp, "<b>"); + if ((section->para->type != para_Heading + && section->para->type != para_Subsect) || (section->para->kwtext + && !section->para-> + words)) + { + xhtml_para(fp, section->para->kwtext); + if(chm_toc)xhtml_para(chm_toc, section->para->kwtext); + if (section->para->words){ + fprintf(fp, ": "); + if(chm_toc)fprintf(chm_toc, ": "); + } + } + if (section->para->type == para_Chapter + || section->para->type == para_Appendix) + fprintf(fp, "</b>"); + if (section->para->words) + { + xhtml_para(fp, section->para->words); + if(chm_toc)xhtml_para(chm_toc, section->para->words); + if(chm_ind)xhtml_para(chm_ind, section->para->words); + } + fprintf(fp, "</a></li>\n"); + if(chm_toc)fprintf(chm_toc,"\"></OBJECT></li>\n"); + if(chm_ind)fprintf(chm_ind,"\"></OBJECT></li>\n"); + return TRUE; +} + +/* + * Write all the sections in this file. Do all paragraphs in this section, then all + * children (recursively), then go on to the next one (tail recursively). + */ +static void xhtml_do_sections(FILE * fp, xhtmlsection * sections) +{ + while (sections) + { + currentsection = sections; + xhtml_do_paras(fp, sections->para); + xhtml_do_sections(fp, sections->child); + sections = sections->next; + } +} + +/* Write this list of paragraphs. Close off all lists at the end. */ +static void xhtml_do_paras(FILE * fp, paragraph * p) +{ + int last_type = -1, first = TRUE; + if (!p) + return; + +/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/ + for (; p && (xhtml_para_level(p) == -1 || first); p = p->next) + { + first = FALSE; + switch (p->type) + { + /* + * Things we ignore because we've already processed them or + * aren't going to touch them in this pass. + */ + case para_IM: + case para_BR: + case para_Biblio: /* only touch BiblioCited */ + case para_VersionID: + case para_Copyright: + case para_Preamble: + case para_NoCite: + case para_Title: + break; + + /* + * Chapter titles. + */ + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + xhtml_heading(fp, p); + break; + + case para_Heading: + case para_Subsect: + xhtml_heading(fp, p); + break; + + case para_Rule: + fprintf(fp, "\n<hr />\n"); + break; + + case para_Normal: + fprintf(fp, "\n<p>"); + xhtml_para(fp, p->words); + fprintf(fp, "</p>\n"); + break; + + case para_Bullet: + case para_NumberedList: + case para_BiblioCited: + if (last_type != p->type) + { + /* start up list if necessary */ + if (p->type == para_Bullet) + { + fprintf(fp, "<ul>\n"); + } else if (p->type == para_NumberedList) + { + fprintf(fp, "<ol>\n"); + } else if (p->type == para_BiblioCited) + { + fprintf(fp, "<dl>\n"); + } + } + if (p->type == para_Bullet || p->type == para_NumberedList) + fprintf(fp, "<li>"); + else if (p->type == para_BiblioCited) + { + fprintf(fp, "<dt>"); + xhtml_para(fp, p->kwtext); + fprintf(fp, "</dt>\n<dd>"); + } + xhtml_para(fp, p->words); + if (p->type == para_BiblioCited) + { + fprintf(fp, "</dd>\n"); + } else if (p->type == para_Bullet || p->type == para_NumberedList) + { + fprintf(fp, "</li>"); + } + if (p->type == para_Bullet || p->type == para_NumberedList + || p->type == para_BiblioCited) + /* close off list if necessary */ + { + paragraph *p2 = p->next; + int close_off = FALSE; +/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/ + if (p2 && xhtml_para_level(p2) == -1) + { + if (p2->type != p->type) + close_off = TRUE; + } else + { + close_off = TRUE; + } + if (close_off) + { + if (p->type == para_Bullet) + { + fprintf(fp, "</ul>\n"); + } else if (p->type == para_NumberedList) + { + fprintf(fp, "</ol>\n"); + } else if (p->type == para_BiblioCited) + { + fprintf(fp, "</dl>\n"); + } + } + } + break; + + case para_Code: + xhtml_codepara(fp, p->words); + break; + } + last_type = p->type; + } +} + +/* + * Output a header for this XHTML file. + */ +static void xhtml_doheader(FILE * fp, word * title) +{ + fprintf(fp, + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"); + fprintf(fp, + "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"); + fprintf(fp, + "<html xmlns='http://www.w3.org/1999/xhtml'>\n\n<head>\n<title>"); + if (title == NULL) + fprintf(fp, "Documentation"); + else + xhtml_para(fp, title); + fprintf(fp, "</title>\n"); + fprintf(fp, + "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", + version); + if (conf.author) + fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author); + if (conf.description) + fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", + conf.description); + if (conf.head_end) + fprintf(fp, "%ls\n", conf.head_end); + fprintf(fp, "</head>\n\n"); + if (conf.body) + fprintf(fp, "%ls\n", conf.body); + else + fprintf(fp, "<body>\n"); + if (conf.body_start) + fprintf(fp, "%ls\n", conf.body_start); +} + +static void chm_doheader(FILE * fp, word * title) +{ + fprintf(fp, "<HTML><BODY><UL><LI><OBJECT type=\"text/sitemap\"><param name=\"Name\" value=\""); + xhtml_para(fp, title); + fprintf(fp,"\"><param name=\"Local\" value=\"Contents.html\"></OBJECT></li>\n"); +} + +/* + * Output a footer for this XHTML file. + */ +static void xhtml_dofooter(FILE * fp) +{ + fprintf(fp, "\n<hr />\n\n"); + if (conf.body_end) + fprintf(fp, "%ls\n", conf.body_end); + if (!conf.suppress_address) + { + fprintf(fp, "<address>\n"); + if (conf.address_start) + fprintf(fp, "%ls\n", conf.address_start); + /* Do the version ID */ + if (conf.include_version_id) + { + paragraph *p; + int started = 0; + for (p = sourceparas; p; p = p->next) + if (p->type == para_VersionID) + { + xhtml_versionid(fp, p->words, started); + started = 1; + } + } + if (conf.address_end) + fprintf(fp, "%ls\n", conf.address_end); + fprintf(fp, "</address>\n"); + } + fprintf(fp, "</body>\n\n</html>\n"); +} +static void chm_dofooter(FILE * fp) +{ + fprintf(fp, "</ul></BODY></HTML>\n"); +} + +/* + * Output the versionid paragraph. Typically this is a version control + * ID string (such as $Id...$ in RCS). + */ +static void xhtml_versionid(FILE * fp, word * text, int started) +{ + rdstringc t = { 0, 0, NULL }; + + rdaddc(&t, '['); /* FIXME: configurability */ + xhtml_rdaddwc(&t, text, NULL); + rdaddc(&t, ']'); /* FIXME: configurability */ + + if (started) + fprintf(fp, "<br>\n"); + fprintf(fp, "%s\n", t.text); + sfree(t.text); +} + +/* Is this an XHTML reserved character? */ +static int xhtml_reservedchar(int c) +{ + if (c == '&' || c == '<' || c == '>' || c == '"') + return TRUE; + else + return FALSE; +} + +/* + * Convert a wide string into valid XHTML: Anything outside ASCII will + * be fixed up as an entity. Currently we don't worry about constraining the + * encoded character set, which we should probably do at some point (we can + * still fix up and return FALSE - see the last comment here). We also don't + * currently + * + * Because this is only used for words, spaces are HARD spaces (any other + * spaces will be word_Whitespace not word_Normal). So they become + * Unless hard_spaces is FALSE, of course (code paragraphs break the above + * rule). + * + * If `result' is non-NULL, mallocs the resulting string and stores a pointer to + * it in `*result'. If `result' is NULL, merely checks whether all + * characters in the string are feasible. + * + * Return is nonzero if all characters are OK. If not all + * characters are OK but `result' is non-NULL, a result _will_ + * still be generated! + */ +static int xhtml_convert(wchar_t * s, char **result, int hard_spaces) +{ + int doing = (result != 0); + int ok = TRUE; + char *p = NULL; + int plen = 0, psize = 0; + + for (; *s; s++) + { + wchar_t c = *s; + +#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); } + + if (((c == 32 && !hard_spaces) + || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) + { + /* Char is OK. */ + if (doing) + { + ensure_size(plen); + p[plen++] = (char) c; + } + } else + { + /* Char needs fixing up. */ + /* ok = FALSE; -- currently we never return FALSE; we + * might want to when considering a character set for the + * encoded document. + */ + if (doing) + { + if (c == 32) + { /* a space in a word is a hard space */ + ensure_size(plen + 7); /* includes space for the NUL, which is subsequently stomped on */ + sprintf(p + plen, " "); + plen += 6; + } else + { + switch (c) + { + case '&': + ensure_size(plen + 6); /* includes space for the NUL, which is subsequently stomped on */ + plen += sprintf(p + plen, "&"); + break; + case '"': + ensure_size(plen + 7); /* includes space for the NUL, which is subsequently stomped on */ + plen += sprintf(p + plen, """); + break; + case '<': + if (plen > 1 && *(s - 1) == '\\' && *(s - 2) == '\\') + { + ensure_size(--plen); + p[plen - 1] = (char) c; + p[plen] = 0; + } else + { + ensure_size(plen + 5); /* includes space for the NUL, which is subsequently stomped on */ + plen += sprintf(p + plen, "<"); + } + break; + case '>': + if (plen > 1 && *(s - 1) == '\\' && *(s - 2) == '\\') + { + ensure_size(--plen); + p[plen - 1] = (char) c; + p[plen] = 0; + } else + { + ensure_size(plen + 5); /* includes space for the NUL, which is subsequently stomped on */ + plen += sprintf(p + plen, ">"); + } + break; + default: + ensure_size(plen + 8); /* includes space for the NUL, which is subsequently stomped on */ + plen += sprintf(p + plen, "&#%04i;", (int) c); + break; + } + } + } + } + } + if (doing) + { + p = resize(p, plen + 1); + p[plen] = '\0'; + *result = p; + } + + return ok; +} + +/* + * This formats the given words as XHTML. + */ +static void xhtml_rdaddwc(rdstringc * rs, word * text, word * end) +{ + char *c; + keyword *kwl; + xhtmlsection *sect; + indextag *itag; + int ti; + wchar_t *s; + + for (; text && text != end; text = text->next) + { + switch (text->type) + { + case word_HyperLink: + xhtml_utostr(text->text, &c); + rdaddsc(rs, "<a href=\""); + if(chm_toc && *c == '.' && *(c+1) == '.') + rdaddsc(rs, c + 1); + else + rdaddsc(rs, c); + rdaddsc(rs, "\">"); + sfree(c); + break; + + case word_LocalHyperLink: + xhtml_utostr(text->text, &c); + rdaddsc(rs, "<a href=\""); + if (conf.rlink_prefix) + { + char *c2; + xhtml_utostr(conf.rlink_prefix, &c2); + rdaddsc(rs, c2); + sfree(c2); + } + rdaddsc(rs, c); + if (conf.rlink_suffix) + { + char *c2; + xhtml_utostr(conf.rlink_suffix, &c2); + rdaddsc(rs, c2); + sfree(c2); + } + rdaddsc(rs, "\">"); + sfree(c); + break; + + case word_UpperXref: + case word_LowerXref: + case word_FreeTextXref: + kwl = kw_lookup(keywords, text->text); + if (kwl) + { + sect = xhtml_find_section(kwl->para); + if (sect) + { + rdaddsc(rs, "<a href=\""); + rdaddsc(rs, sect->file->filename); + rdaddc(rs, '#'); + rdaddsc(rs, sect->fragment); + rdaddsc(rs, "\">"); + } else + { + rdaddsc(rs, + "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->"); + error(err_whatever, + "Couldn't locate cross-reference! (Probably a bibliography entry.)"); + } + } else + { + rdaddsc(rs, + "<a href=\"Apologies.html\"><!-- unknown cross-reference -->"); + error(err_whatever, + "Couldn't locate cross-reference! (Wasn't in source file.)"); + } + break; + + case word_IndexRef: /* in theory we could make an index target here */ +/* rdaddsc(rs, "<a name=\"idx-"); + xhtml_utostr(text->text, &c); + rdaddsc(rs, c); + sfree(c); + rdaddsc(rs, "\"></a>");*/ + /* what we _do_ need to do is to fix up the backend data + * for any indexentry this points to. + */ + for (ti = 0; + (itag = (indextag *) index234(idx->tags, ti)) != NULL; ti++) + { + /* FIXME: really ustricmp() and not ustrcmp()? */ + if (ustricmp(itag->name, text->text) == 0) + { + break; + } + } + if (itag != NULL) + { + if (itag->refs != NULL) + { + int i; + for (i = 0; i < itag->nrefs; i++) + { + xhtmlindex *idx_ref; + indexentry *ientry; + + ientry = itag->refs[i]; + if (ientry->backend_data == NULL) + { + idx_ref = (xhtmlindex *) smalloc(sizeof(xhtmlindex)); + if (idx_ref == NULL) + fatal(err_nomemory); + idx_ref->nsection = 0; + idx_ref->size = 4; + idx_ref->sections = + (xhtmlsection **) smalloc(idx_ref->size * + sizeof(xhtmlsection *)); + if (idx_ref->sections == NULL) + fatal(err_nomemory); + ientry->backend_data = idx_ref; + } else + { + idx_ref = ientry->backend_data; + if (idx_ref->nsection + 1 > idx_ref->size) + { + int new_size = idx_ref->size * 2; + idx_ref->sections = + srealloc(idx_ref->sections, + new_size * sizeof(xhtmlsection)); + if (idx_ref->sections == NULL) + { + fatal(err_nomemory); + } + idx_ref->size = new_size; + } + } + idx_ref->sections[idx_ref->nsection++] = currentsection; +#if 0 +#endif + } + } else + { + fatal(err_whatever, "Index tag had no entries!"); + } + } else + { + fprintf(stderr, "Looking for index entry '%ls'\n", text->text); + fatal(err_whatever, + "Couldn't locate index entry! (Wasn't in index.)"); + } + break; + + case word_HyperEnd: + case word_XrefEnd: + rdaddsc(rs, "</a>"); + break; + + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + assert(text->type != word_CodeQuote && + text->type != word_WkCodeQuote); + if (towordstyle(text->type) == word_Emph && + (attraux(text->aux) == attr_First || + attraux(text->aux) == attr_Only)) + rdaddsc(rs, "<em>"); + else if ((towordstyle(text->type) == word_Code + || towordstyle(text->type) == word_WeakCode) + && (attraux(text->aux) == attr_First + || attraux(text->aux) == attr_Only)) + rdaddsc(rs, "<code>"); + + if (removeattr(text->type) == word_Normal) + { + static int dont_convert = 0; + if (dont_convert) + { + char buf[2] = " "; + dont_convert = 0; + s = text->text; + for (; *s; s++) + { + buf[0] = (char) *s; + rdaddsc(rs, buf); + } + buf[0] = 0; + rdaddsc(rs, buf); + } else + { + if (*text->text == '\\' && text->next + && text->next->text && (*text->next->text == '&' + || *text->next->text == '<' + || *text->next->text == '>' + || *text->next->text == '"')) + dont_convert = 1; + else + { + if (xhtml_convert(text->text, &c, TRUE)) /* spaces in the word are hard */ + rdaddsc(rs, c); + else + xhtml_rdaddwc(rs, text->alt, NULL); + sfree(c); + } + } + } else if (removeattr(text->type) == word_WhiteSpace) + { + rdaddc(rs, ' '); + } else if (removeattr(text->type) == word_Quote) + { + rdaddsc(rs, """); + } + + if (towordstyle(text->type) == word_Emph && + (attraux(text->aux) == attr_Last || + attraux(text->aux) == attr_Only)) + rdaddsc(rs, "</em>"); + else if ((towordstyle(text->type) == word_Code + || towordstyle(text->type) == word_WeakCode) + && (attraux(text->aux) == attr_Last + || attraux(text->aux) == attr_Only)) + rdaddsc(rs, "</code>"); + break; + } + } +} + +/* Output a heading, formatted as XHTML. + */ +static void xhtml_heading(FILE * fp, paragraph * p) +{ + rdstringc t = { 0, 0, NULL }; + word *tprefix = p->kwtext; + word *nprefix = p->kwtext2; + word *text = p->words; + int level = xhtml_para_level(p); + xhtmlsection *sect = xhtml_find_section(p); + xhtmlheadfmt *fmt; + char *fragment; + if (sect) + { + fragment = sect->fragment; + } else + { + if (p->type == para_Title) + fragment = "title"; + else + { + fragment = ""; /* FIXME: what else can we do? */ + error(err_whatever, "Couldn't locate heading cross-reference!"); + } + } + + if (p->type == para_Title) + fmt = NULL; + else if (level == 1) + fmt = &conf.fchapter; + else if (level - 1 < conf.nfsect) + fmt = &conf.fsect[level - 1]; + else + fmt = &conf.fsect[conf.nfsect - 1]; + + if (fmt && fmt->just_numbers && nprefix) + { + xhtml_rdaddwc(&t, nprefix, NULL); + if (fmt) + { + char *c; + if (xhtml_convert(fmt->number_suffix, &c, FALSE)) + { + rdaddsc(&t, c); + sfree(c); + } + } + } else if (fmt && !fmt->just_numbers && tprefix) + { + xhtml_rdaddwc(&t, tprefix, NULL); + if (fmt) + { + char *c; + if (xhtml_convert(fmt->number_suffix, &c, FALSE)) + { + rdaddsc(&t, c); + sfree(c); + } + } + } + xhtml_rdaddwc(&t, text, NULL); + /* + * If we're outputting in single-file mode, we need to lower + * the level of each heading by one, because the overall + * document title will be sitting right at the top as an <h1> + * and so chapters and sections should start at <h2>. + * + * Even if not, the document title will come back from + * xhtml_para_level() as level zero, so we must increment that + * no matter what leaf_level is set to. + */ + if (conf.leaf_level == 0 || level == 0) + level++; + fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, + t.text, level); + sfree(t.text); +} + +/* Output a paragraph. Styles are handled by xhtml_rdaddwc(). + * This looks pretty simple; I may have missed something ... + */ +static void xhtml_para(FILE * fp, word * text) +{ + rdstringc out = { 0, 0, NULL }; + xhtml_rdaddwc(&out, text, NULL); + fprintf(fp, "%s", out.text); + sfree(out.text); +} + +/* Output a code paragraph. I'm treating this as preformatted, which + * may not be entirely correct. See xhtml_para() for my worries about + * this being overly-simple; however I think that most of the complexity + * of the text backend came entirely out of word wrapping anyway. + */ +static void xhtml_codepara(FILE * fp, word * text) +{ + fprintf(fp, "<pre>"); + for (; text; text = text->next) + if (text->type == word_WeakCode) + { + char *c; + xhtml_convert(text->text, &c, FALSE); + fprintf(fp, "%s\n", c); + sfree(c); + } + fprintf(fp, "</pre>\n"); +} diff --git a/Docs/src/bin/halibut/contents.c b/Docs/src/bin/halibut/contents.c index 28bc1f6..27215fe 100755 --- a/Docs/src/bin/halibut/contents.c +++ b/Docs/src/bin/halibut/contents.c @@ -1,246 +1,246 @@ -/*
- * contents.c: build a table of contents
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <limits.h>
-#include <string.h>
-#include "halibut.h"
-
-struct numberstate_Tag {
- int chapternum;
- int appendixnum;
- int ischapter;
- int *sectionlevels;
- paragraph **currentsects;
- paragraph *lastsect;
- int oklevel;
- int maxsectlevel;
- int listitem;
- wchar_t *chaptertext; /* the word for a chapter */
- wchar_t *sectiontext; /* the word for a section */
- wchar_t *apptext; /* the word for an appendix */
-};
-
-numberstate *number_init(void)
-{
- numberstate *ret = mknew(numberstate);
- ret->chapternum = 0;
- ret->appendixnum = -1;
- ret->ischapter = 1;
- ret->oklevel = -1; /* not even in a chapter yet */
- ret->maxsectlevel = 32;
- ret->sectionlevels = mknewa(int, ret->maxsectlevel);
- ret->currentsects = mknewa(paragraph *, ret->maxsectlevel + 1);
- memset(ret->currentsects, 0,
- (ret->maxsectlevel + 1) * sizeof(paragraph *));
- ret->lastsect = NULL;
- ret->listitem = -1;
- return ret;
-}
-
-void number_free(numberstate * state)
-{
- sfree(state->sectionlevels);
- sfree(state->currentsects);
- sfree(state);
-}
-
-static void dotext(word *** wret, wchar_t * text)
-{
- word *mnewword = mknew(word);
- mnewword->text = ustrdup(text);
- mnewword->type = word_Normal;
- mnewword->alt = NULL;
- mnewword->next = NULL;
- **wret = mnewword;
- *wret = &mnewword->next;
-}
-
-static void dospace(word *** wret)
-{
- word *mnewword = mknew(word);
- mnewword->text = NULL;
- mnewword->type = word_WhiteSpace;
- mnewword->alt = NULL;
- mnewword->next = NULL;
- **wret = mnewword;
- *wret = &mnewword->next;
-}
-
-static void donumber(word *** wret, int num)
-{
- wchar_t text[20];
- int i = 19;
- text[i] = L'\0';
- while (num != 0)
- {
- assert(i >= 0);
- i--; text[i] = L"0123456789"[num % 10];
- num /= 10;
- }
- dotext(wret, &text[i]);
-}
-
-static void doanumber(word *** wret, int num)
-{
- wchar_t text[20];
- int i = 19;
- int nletters, aton;
- nletters = 1;
- aton = 25;
- while (num > aton)
- {
- nletters++;
- num -= aton + 1;
- if (aton < INT_MAX / 26)
- aton = (aton + 1) * 26 - 1;
- else
- aton = INT_MAX;
- }
- text[i] = L'\0';
- while (nletters--)
- {
- assert(i >= 0);
- i--; text[i] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num % 26];
- num /= 26;
- }
- dotext(wret, &text[i]);
-}
-
-void number_cfg(numberstate * state, paragraph * source)
-{
- /*
- * Defaults
- */
- state->chaptertext = L"Chapter";
- state->sectiontext = L"Section";
- state->apptext = L"Appendix";
-
- for (; source; source = source->next)
- {
- if (source->type == para_Config)
- {
- if (!ustricmp(source->keyword, L"chapter"))
- {
- state->chaptertext = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"section"))
- {
- state->sectiontext = uadv(source->keyword);
- } else if (!ustricmp(source->keyword, L"appendix"))
- {
- state->apptext = uadv(source->keyword);
- }
- }
- }
-}
-
-word *number_mktext(numberstate * state, paragraph * p, wchar_t * category,
- int prev, int *errflag)
-{
- word *ret = NULL;
- word **ret2 = &ret;
- word **pret = &ret;
- int i, level;
-
- level = -2; /* default for non-section-heading */
- switch (p->type)
- {
- case para_Chapter:
- state->chapternum++;
- for (i = 0; i < state->maxsectlevel; i++)
- state->sectionlevels[i] = 0;
- dotext(&pret, category ? category : state->chaptertext);
- dospace(&pret);
- ret2 = pret;
- donumber(&pret, state->chapternum);
- state->ischapter = 1;
- state->oklevel = 0;
- level = -1;
- break;
- case para_Heading:
- case para_Subsect:
- level = (p->type == para_Heading ? 0 : p->aux);
- if (level > state->oklevel)
- {
- error(err_sectjump, &p->fpos);
- *errflag = TRUE;
- ret = NULL;
- break;
- }
- state->oklevel = level + 1;
- if (state->maxsectlevel <= level)
- {
- state->maxsectlevel = level + 32;
- state->sectionlevels = resize(state->sectionlevels,
- state->maxsectlevel);
- }
- state->sectionlevels[level]++;
- for (i = level + 1; i < state->maxsectlevel; i++)
- state->sectionlevels[i] = 0;
- dotext(&pret, category ? category : state->sectiontext);
- dospace(&pret);
- ret2 = pret;
- if (state->ischapter)
- donumber(&pret, state->chapternum);
- else
- doanumber(&pret, state->appendixnum);
- for (i = 0; i <= level; i++)
- {
- dotext(&pret, L".");
- if (state->sectionlevels[i] == 0)
- state->sectionlevels[i] = 1;
- donumber(&pret, state->sectionlevels[i]);
- }
- break;
- case para_Appendix:
- state->appendixnum++;
- for (i = 0; i < state->maxsectlevel; i++)
- state->sectionlevels[i] = 0;
- dotext(&pret, category ? category : state->apptext);
- dospace(&pret);
- ret2 = pret;
- doanumber(&pret, state->appendixnum);
- state->ischapter = 0;
- state->oklevel = 0;
- level = -1;
- break;
- case para_UnnumberedChapter:
- level = -1;
- break;
- case para_NumberedList:
- ret2 = pret;
- if (prev != para_NumberedList)
- state->listitem = 0;
- state->listitem++;
- donumber(&pret, state->listitem);
- break;
- }
-
- /*
- * Now set up parent, child and sibling links.
- */
- p->parent = p->child = p->sibling = NULL;
- if (level != -2)
- {
- if (state->currentsects[level + 1])
- state->currentsects[level + 1]->sibling = p;
- if (level >= 0 && state->currentsects[level])
- {
- p->parent = state->currentsects[level];
- if (!state->currentsects[level]->child)
- state->currentsects[level]->child = p;
- }
- state->currentsects[level + 1] = state->lastsect = p;
- for (i = level + 2; i < state->maxsectlevel + 1; i++)
- state->currentsects[i] = NULL;
- } else
- {
- p->parent = state->lastsect;
- }
-
- p->kwtext2 = *ret2;
- return ret;
-}
+/* + * contents.c: build a table of contents + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <limits.h> +#include <string.h> +#include "halibut.h" + +struct numberstate_Tag { + int chapternum; + int appendixnum; + int ischapter; + int *sectionlevels; + paragraph **currentsects; + paragraph *lastsect; + int oklevel; + int maxsectlevel; + int listitem; + wchar_t *chaptertext; /* the word for a chapter */ + wchar_t *sectiontext; /* the word for a section */ + wchar_t *apptext; /* the word for an appendix */ +}; + +numberstate *number_init(void) +{ + numberstate *ret = mknew(numberstate); + ret->chapternum = 0; + ret->appendixnum = -1; + ret->ischapter = 1; + ret->oklevel = -1; /* not even in a chapter yet */ + ret->maxsectlevel = 32; + ret->sectionlevels = mknewa(int, ret->maxsectlevel); + ret->currentsects = mknewa(paragraph *, ret->maxsectlevel + 1); + memset(ret->currentsects, 0, + (ret->maxsectlevel + 1) * sizeof(paragraph *)); + ret->lastsect = NULL; + ret->listitem = -1; + return ret; +} + +void number_free(numberstate * state) +{ + sfree(state->sectionlevels); + sfree(state->currentsects); + sfree(state); +} + +static void dotext(word *** wret, wchar_t * text) +{ + word *mnewword = mknew(word); + mnewword->text = ustrdup(text); + mnewword->type = word_Normal; + mnewword->alt = NULL; + mnewword->next = NULL; + **wret = mnewword; + *wret = &mnewword->next; +} + +static void dospace(word *** wret) +{ + word *mnewword = mknew(word); + mnewword->text = NULL; + mnewword->type = word_WhiteSpace; + mnewword->alt = NULL; + mnewword->next = NULL; + **wret = mnewword; + *wret = &mnewword->next; +} + +static void donumber(word *** wret, int num) +{ + wchar_t text[20]; + int i = 19; + text[i] = L'\0'; + while (num != 0) + { + assert(i >= 0); + i--; text[i] = L"0123456789"[num % 10]; + num /= 10; + } + dotext(wret, &text[i]); +} + +static void doanumber(word *** wret, int num) +{ + wchar_t text[20]; + int i = 19; + int nletters, aton; + nletters = 1; + aton = 25; + while (num > aton) + { + nletters++; + num -= aton + 1; + if (aton < INT_MAX / 26) + aton = (aton + 1) * 26 - 1; + else + aton = INT_MAX; + } + text[i] = L'\0'; + while (nletters--) + { + assert(i >= 0); + i--; text[i] = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num % 26]; + num /= 26; + } + dotext(wret, &text[i]); +} + +void number_cfg(numberstate * state, paragraph * source) +{ + /* + * Defaults + */ + state->chaptertext = L"Chapter"; + state->sectiontext = L"Section"; + state->apptext = L"Appendix"; + + for (; source; source = source->next) + { + if (source->type == para_Config) + { + if (!ustricmp(source->keyword, L"chapter")) + { + state->chaptertext = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"section")) + { + state->sectiontext = uadv(source->keyword); + } else if (!ustricmp(source->keyword, L"appendix")) + { + state->apptext = uadv(source->keyword); + } + } + } +} + +word *number_mktext(numberstate * state, paragraph * p, wchar_t * category, + int prev, int *errflag) +{ + word *ret = NULL; + word **ret2 = &ret; + word **pret = &ret; + int i, level; + + level = -2; /* default for non-section-heading */ + switch (p->type) + { + case para_Chapter: + state->chapternum++; + for (i = 0; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext(&pret, category ? category : state->chaptertext); + dospace(&pret); + ret2 = pret; + donumber(&pret, state->chapternum); + state->ischapter = 1; + state->oklevel = 0; + level = -1; + break; + case para_Heading: + case para_Subsect: + level = (p->type == para_Heading ? 0 : p->aux); + if (level > state->oklevel) + { + error(err_sectjump, &p->fpos); + *errflag = TRUE; + ret = NULL; + break; + } + state->oklevel = level + 1; + if (state->maxsectlevel <= level) + { + state->maxsectlevel = level + 32; + state->sectionlevels = resize(state->sectionlevels, + state->maxsectlevel); + } + state->sectionlevels[level]++; + for (i = level + 1; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext(&pret, category ? category : state->sectiontext); + dospace(&pret); + ret2 = pret; + if (state->ischapter) + donumber(&pret, state->chapternum); + else + doanumber(&pret, state->appendixnum); + for (i = 0; i <= level; i++) + { + dotext(&pret, L"."); + if (state->sectionlevels[i] == 0) + state->sectionlevels[i] = 1; + donumber(&pret, state->sectionlevels[i]); + } + break; + case para_Appendix: + state->appendixnum++; + for (i = 0; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext(&pret, category ? category : state->apptext); + dospace(&pret); + ret2 = pret; + doanumber(&pret, state->appendixnum); + state->ischapter = 0; + state->oklevel = 0; + level = -1; + break; + case para_UnnumberedChapter: + level = -1; + break; + case para_NumberedList: + ret2 = pret; + if (prev != para_NumberedList) + state->listitem = 0; + state->listitem++; + donumber(&pret, state->listitem); + break; + } + + /* + * Now set up parent, child and sibling links. + */ + p->parent = p->child = p->sibling = NULL; + if (level != -2) + { + if (state->currentsects[level + 1]) + state->currentsects[level + 1]->sibling = p; + if (level >= 0 && state->currentsects[level]) + { + p->parent = state->currentsects[level]; + if (!state->currentsects[level]->child) + state->currentsects[level]->child = p; + } + state->currentsects[level + 1] = state->lastsect = p; + for (i = level + 2; i < state->maxsectlevel + 1; i++) + state->currentsects[i] = NULL; + } else + { + p->parent = state->lastsect; + } + + p->kwtext2 = *ret2; + return ret; +} diff --git a/Docs/src/bin/halibut/error.c b/Docs/src/bin/halibut/error.c index 9d45e82..8c08e3a 100755 --- a/Docs/src/bin/halibut/error.c +++ b/Docs/src/bin/halibut/error.c @@ -1,225 +1,225 @@ -/*
- * error.c: Halibut error handling
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include "halibut.h"
-
-/*
- * Error flags
- */
-#define PREFIX 0x0001 /* give `halibut:' prefix */
-#define FILEPOS 0x0002 /* give file position prefix */
-
-static void do_error(int code, va_list ap)
-{
- char error[1024];
- char auxbuf[256];
- char *sp, *sp2;
- wchar_t *wsp;
- filepos fpos, fpos2;
- int flags=0;
-
- switch (code)
- {
- case err_nomemory: /* no arguments */
- sprintf(error, "out of memory");
- flags = PREFIX;
- break;
- case err_optnoarg:
- sp = va_arg(ap, char *);
- sprintf(error, "option `-%.200s' requires an argument", sp);
- flags = PREFIX;
- break;
- case err_nosuchopt:
- sp = va_arg(ap, char *);
- sprintf(error, "unrecognised option `-%.200s'", sp);
- flags = PREFIX;
- break;
- case err_noinput: /* no arguments */
- sprintf(error, "no input files");
- flags = PREFIX;
- break;
- case err_cantopen:
- sp = va_arg(ap, char *);
- sprintf(error, "unable to open input file `%.200s'", sp);
- flags = PREFIX;
- break;
- case err_nodata: /* no arguments */
- sprintf(error, "no data in input files");
- flags = PREFIX;
- break;
- case err_brokencodepara:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "every line of a code paragraph should begin `\\c'");
- flags = FILEPOS;
- break;
- case err_kwunclosed:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "expected `}' after paragraph keyword");
- flags = FILEPOS;
- break;
- case err_kwexpected:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "expected a paragraph keyword");
- flags = FILEPOS;
- break;
- case err_kwillegal:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "expected no paragraph keyword");
- flags = FILEPOS;
- break;
- case err_kwtoomany:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "expected only one paragraph keyword");
- flags = FILEPOS;
- break;
- case err_bodyillegal:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "expected no text after paragraph keyword");
- flags = FILEPOS;
- break;
- case err_badparatype:
- wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "command `%.200s' unrecognised at start of"
- " paragraph", sp);
- flags = FILEPOS;
- break;
- case err_badmidcmd:
- wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp);
- flags = FILEPOS;
- break;
- case err_unexbrace:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "brace character unexpected in mid-paragraph");
- flags = FILEPOS;
- break;
- case err_explbr:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "expected `{' after command");
- flags = FILEPOS;
- break;
- case err_commenteof:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "end of file unexpected inside `\\#{...}' comment");
- flags = FILEPOS;
- break;
- case err_kwexprbr:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "expected `}' after cross-reference");
- flags = FILEPOS;
- break;
- case err_missingrbrace:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "unclosed braces at end of paragraph");
- flags = FILEPOS;
- break;
- case err_nestedstyles:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "unable to nest text styles");
- flags = FILEPOS;
- break;
- case err_nestedindex:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "unable to nest index markings");
- flags = FILEPOS;
- break;
- case err_nosuchkw:
- fpos = *va_arg(ap, filepos *);
- wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
- sprintf(error, "unable to resolve cross-reference to `%.200s'", sp);
- flags = FILEPOS;
- break;
- case err_multiBR:
- fpos = *va_arg(ap, filepos *);
- wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
- sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp);
- flags = FILEPOS;
- break;
- case err_nosuchidxtag:
- wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
- sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp);
- flags = 0;
- /* FIXME: need to get a filepos to here somehow */
- break;
- case err_cantopenw:
- sp = va_arg(ap, char *);
- sprintf(error, "unable to open output file `%.200s'", sp);
- flags = PREFIX;
- break;
- case err_macroexists:
- fpos = *va_arg(ap, filepos *);
- wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
- sprintf(error, "macro `%.200s' already defined", sp);
- flags = FILEPOS;
- break;
- case err_sectjump:
- fpos = *va_arg(ap, filepos *);
- sprintf(error, "expected higher heading levels before this one");
- flags = FILEPOS;
- break;
- case err_winhelp_ctxclash:
- fpos = *va_arg(ap, filepos *);
- sp = va_arg(ap, char *);
- sp2 = va_arg(ap, char *);
- sprintf(error, "Windows Help context id `%.200s' clashes with "
- "previously defined `%.200s'", sp, sp2);
- flags = FILEPOS;
- break;
- case err_multikw:
- fpos = *va_arg(ap, filepos *);
- fpos2 = *va_arg(ap, filepos *);
- wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
- sprintf(error, "paragraph keyword `%.200s' already defined at ", sp);
- sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line);
- flags = FILEPOS;
- break;
- case err_whatever:
- sp = va_arg(ap, char *);
- vsprintf(error, sp, ap);
- flags = PREFIX;
- break;
- }
-
- if (flags & PREFIX)
- fputs("halibut: ", stderr);
- if (flags & FILEPOS)
- {
- fprintf(stderr, "%s:%d:", fpos.filename, fpos.line);
- if (fpos.col > 0)
- fprintf(stderr, "%d:", fpos.col);
- fputc(' ', stderr);
- }
- fputs(error, stderr);
- fputc('\n', stderr);
-}
-
-void fatal(int code, ...)
-{
- va_list ap;
- va_start(ap, code);
- do_error(code, ap);
- va_end(ap);
- exit(EXIT_FAILURE);
-}
-
-void error(int code, ...)
-{
- va_list ap;
- va_start(ap, code);
- do_error(code, ap);
- va_end(ap);
-}
+/* + * error.c: Halibut error handling + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include "halibut.h" + +/* + * Error flags + */ +#define PREFIX 0x0001 /* give `halibut:' prefix */ +#define FILEPOS 0x0002 /* give file position prefix */ + +static void do_error(int code, va_list ap) +{ + char error[1024]; + char auxbuf[256]; + char *sp, *sp2; + wchar_t *wsp; + filepos fpos, fpos2; + int flags=0; + + switch (code) + { + case err_nomemory: /* no arguments */ + sprintf(error, "out of memory"); + flags = PREFIX; + break; + case err_optnoarg: + sp = va_arg(ap, char *); + sprintf(error, "option `-%.200s' requires an argument", sp); + flags = PREFIX; + break; + case err_nosuchopt: + sp = va_arg(ap, char *); + sprintf(error, "unrecognised option `-%.200s'", sp); + flags = PREFIX; + break; + case err_noinput: /* no arguments */ + sprintf(error, "no input files"); + flags = PREFIX; + break; + case err_cantopen: + sp = va_arg(ap, char *); + sprintf(error, "unable to open input file `%.200s'", sp); + flags = PREFIX; + break; + case err_nodata: /* no arguments */ + sprintf(error, "no data in input files"); + flags = PREFIX; + break; + case err_brokencodepara: + fpos = *va_arg(ap, filepos *); + sprintf(error, "every line of a code paragraph should begin `\\c'"); + flags = FILEPOS; + break; + case err_kwunclosed: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected `}' after paragraph keyword"); + flags = FILEPOS; + break; + case err_kwexpected: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected a paragraph keyword"); + flags = FILEPOS; + break; + case err_kwillegal: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected no paragraph keyword"); + flags = FILEPOS; + break; + case err_kwtoomany: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected only one paragraph keyword"); + flags = FILEPOS; + break; + case err_bodyillegal: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected no text after paragraph keyword"); + flags = FILEPOS; + break; + case err_badparatype: + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + fpos = *va_arg(ap, filepos *); + sprintf(error, "command `%.200s' unrecognised at start of" + " paragraph", sp); + flags = FILEPOS; + break; + case err_badmidcmd: + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + fpos = *va_arg(ap, filepos *); + sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp); + flags = FILEPOS; + break; + case err_unexbrace: + fpos = *va_arg(ap, filepos *); + sprintf(error, "brace character unexpected in mid-paragraph"); + flags = FILEPOS; + break; + case err_explbr: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected `{' after command"); + flags = FILEPOS; + break; + case err_commenteof: + fpos = *va_arg(ap, filepos *); + sprintf(error, "end of file unexpected inside `\\#{...}' comment"); + flags = FILEPOS; + break; + case err_kwexprbr: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected `}' after cross-reference"); + flags = FILEPOS; + break; + case err_missingrbrace: + fpos = *va_arg(ap, filepos *); + sprintf(error, "unclosed braces at end of paragraph"); + flags = FILEPOS; + break; + case err_nestedstyles: + fpos = *va_arg(ap, filepos *); + sprintf(error, "unable to nest text styles"); + flags = FILEPOS; + break; + case err_nestedindex: + fpos = *va_arg(ap, filepos *); + sprintf(error, "unable to nest index markings"); + flags = FILEPOS; + break; + case err_nosuchkw: + fpos = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "unable to resolve cross-reference to `%.200s'", sp); + flags = FILEPOS; + break; + case err_multiBR: + fpos = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp); + flags = FILEPOS; + break; + case err_nosuchidxtag: + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp); + flags = 0; + /* FIXME: need to get a filepos to here somehow */ + break; + case err_cantopenw: + sp = va_arg(ap, char *); + sprintf(error, "unable to open output file `%.200s'", sp); + flags = PREFIX; + break; + case err_macroexists: + fpos = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "macro `%.200s' already defined", sp); + flags = FILEPOS; + break; + case err_sectjump: + fpos = *va_arg(ap, filepos *); + sprintf(error, "expected higher heading levels before this one"); + flags = FILEPOS; + break; + case err_winhelp_ctxclash: + fpos = *va_arg(ap, filepos *); + sp = va_arg(ap, char *); + sp2 = va_arg(ap, char *); + sprintf(error, "Windows Help context id `%.200s' clashes with " + "previously defined `%.200s'", sp, sp2); + flags = FILEPOS; + break; + case err_multikw: + fpos = *va_arg(ap, filepos *); + fpos2 = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "paragraph keyword `%.200s' already defined at ", sp); + sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line); + flags = FILEPOS; + break; + case err_whatever: + sp = va_arg(ap, char *); + vsprintf(error, sp, ap); + flags = PREFIX; + break; + } + + if (flags & PREFIX) + fputs("halibut: ", stderr); + if (flags & FILEPOS) + { + fprintf(stderr, "%s:%d:", fpos.filename, fpos.line); + if (fpos.col > 0) + fprintf(stderr, "%d:", fpos.col); + fputc(' ', stderr); + } + fputs(error, stderr); + fputc('\n', stderr); +} + +void fatal(int code, ...) +{ + va_list ap; + va_start(ap, code); + do_error(code, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +void error(int code, ...) +{ + va_list ap; + va_start(ap, code); + do_error(code, ap); + va_end(ap); +} diff --git a/Docs/src/bin/halibut/halibut.h b/Docs/src/bin/halibut/halibut.h index 43cd7da..dfbbe2b 100755 --- a/Docs/src/bin/halibut/halibut.h +++ b/Docs/src/bin/halibut/halibut.h @@ -1,408 +1,408 @@ -#ifndef HALIBUT_HALIBUT_H
-#define HALIBUT_HALIBUT_H
-
-#include <stdio.h>
-#include <wchar.h>
-#include <time.h>
-
-#ifdef __GNUC__
-#define NORETURN __attribute__((__noreturn__))
-#else
-#define NORETURN /* nothing */
-#endif
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-/* For suppressing unused-parameter warnings */
-#define IGNORE(x) ( (x) = (x) )
-
-#include "tree234.h"
-
-/*
- * Structure tags
- */
-typedef struct input_Tag input;
-typedef struct filepos_Tag filepos;
-typedef struct paragraph_Tag paragraph;
-typedef struct word_Tag word;
-typedef struct keywordlist_Tag keywordlist;
-typedef struct keyword_Tag keyword;
-typedef struct userstyle_Tag userstyle;
-typedef struct numberstate_Tag numberstate;
-typedef struct indexdata_Tag indexdata;
-typedef struct indextag_Tag indextag;
-typedef struct indexentry_Tag indexentry;
-typedef struct macrostack_Tag macrostack;
-
-/*
- * Data structure to hold a file name and index, a line and a
- * column number, for reporting errors
- */
-struct filepos_Tag {
- char *filename;
- int line, col;
-};
-
-/*
- * Data structure to hold all the file names etc for input
- */
-typedef struct pushback_Tag {
- int chr;
- filepos pos;
-} pushback;
-struct input_Tag {
- char **filenames; /* complete list of input files */
- int nfiles; /* how many in the list */
- FILE *currfp; /* the currently open one */
- int currindex; /* which one is that in the list */
- pushback *pushback; /* pushed-back input characters */
- int npushback, pushbacksize;
- filepos pos;
- int reportcols; /* report column numbers in errors */
- macrostack *stack; /* macro expansions in force */
-};
-
-/*
- * Data structure to hold the input form of the source, ie a linked
- * list of paragraphs
- */
-struct paragraph_Tag {
- paragraph *next;
- int type;
- wchar_t *keyword; /* for most special paragraphs */
- word *words; /* list of words in paragraph */
- int aux; /* number, in a numbered paragraph
- * or subsection level
- */
- word *kwtext; /* chapter/section indication */
- word *kwtext2; /* numeric-only form of kwtext */
- filepos fpos;
-
- paragraph *parent, *child, *sibling; /* for hierarchy navigation */
-
- void *private_data; /* for temp use in backends */
-};
-enum {
- para_IM, /* index merge */
- para_BR, /* bibliography rewrite */
- para_Rule, /* random horizontal rule */
- para_Chapter,
- para_Appendix,
- para_UnnumberedChapter,
- para_Heading,
- para_Subsect,
- para_Normal,
- para_Biblio, /* causes no output unless turned ... */
- para_BiblioCited, /* ... into this paragraph type */
- para_Bullet,
- para_NumberedList,
- para_Code,
- para_Copyright,
- para_Preamble,
- para_NoCite,
- para_Title,
- para_VersionID,
- para_Config, /* configuration directive */
- para_NotParaType /* placeholder value */
-};
-
-/*
- * Data structure to hold an individual word
- */
-struct word_Tag {
- word *next, *alt;
- int type;
- int aux;
- int breaks; /* can a line break after it? */
- wchar_t *text;
- filepos fpos;
-};
-enum {
- /* ORDERING CONSTRAINT: these normal-word types ... */
- word_Normal,
- word_Emph,
- word_Code, /* monospaced; `quoted' in text */
- word_WeakCode, /* monospaced, normal in text */
- /* ... must be in the same order as these space types ... */
- word_WhiteSpace, /* text is NULL or ignorable */
- word_EmphSpace, /* WhiteSpace when emphasised */
- word_CodeSpace, /* WhiteSpace when code */
- word_WkCodeSpace, /* WhiteSpace when weak code */
- /* ... and must be in the same order as these quote types ... */
- word_Quote, /* text is NULL or ignorable */
- word_EmphQuote, /* Quote when emphasised */
- word_CodeQuote, /* (can't happen) */
- word_WkCodeQuote, /* (can't happen) */
- /* END ORDERING CONSTRAINT */
- word_internal_endattrs,
- word_UpperXref, /* \K */
- word_LowerXref, /* \k */
- word_XrefEnd, /* (invisible; no text) */
- word_IndexRef, /* (always an invisible one) */
- word_HyperLink, /* (invisible) */
- word_HyperEnd, /* (also invisible; no text) */
- word_LocalHyperLink, /* (invisible) */
- word_FreeTextXref /* \R */
-};
-/* aux values for attributed words */
-enum {
- attr_Only = 0x0000, /* a lone word with the attribute */
- attr_First = 0x0001, /* the first of a series */
- attr_Last = 0x0002, /* the last of a series */
- attr_Always = 0x0003, /* any other part of a series */
- attr_mask = 0x0003,
-};
-/* aux values for quote-type words */
-enum {
- quote_Open = 0x0010,
- quote_Close = 0x0020,
- quote_mask = 0x0030,
-};
-#define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \
- ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) )
-#define sameattr(x,y) ( (((x)-(y)) & 3) == 0 )
-#define towordstyle(x) ( word_Normal + ((x) & 3) )
-#define tospacestyle(x) ( word_WhiteSpace + ((x) & 3) )
-#define toquotestyle(x) ( word_Quote + ((x) & 3) )
-#define removeattr(x) ( word_Normal + ((x) &~ 3) )
-
-#define attraux(x) ( (x) & attr_mask )
-#define quoteaux(x) ( (x) & quote_mask )
-
-/*
- * error.c
- */
-void fatal(int code, ...) NORETURN;
-void error(int code, ...);
-enum {
- err_nomemory, /* out of memory */
- err_optnoarg, /* option `-%s' requires an argument */
- err_nosuchopt, /* unrecognised option `-%s' */
- err_noinput, /* no input files */
- err_cantopen, /* unable to open input file `%s' */
- err_nodata, /* no data in input files */
- err_brokencodepara, /* line in codepara didn't begin `\c' */
- err_kwunclosed, /* expected `}' after keyword */
- err_kwillegal, /* paragraph type expects no keyword */
- err_kwexpected, /* paragraph type expects a keyword */
- err_kwtoomany, /* paragraph type expects only 1 */
- err_bodyillegal, /* paragraph type expects only kws! */
- err_badparatype, /* invalid command at start of para */
- err_badmidcmd, /* invalid command in mid-para */
- err_unexbrace, /* unexpected brace */
- err_explbr, /* expected `{' after command */
- err_commenteof, /* EOF inside braced comment */
- err_kwexprbr, /* expected `}' after cross-ref */
- err_missingrbrace, /* unclosed braces at end of para */
- err_nestedstyles, /* unable to nest text styles */
- err_nestedindex, /* unable to nest `\i' thingys */
- err_nosuchkw, /* unresolved cross-reference */
- err_multiBR, /* multiple \BRs on same keyword */
- err_nosuchidxtag, /* \IM on unknown index tag (warning) */
- err_cantopenw, /* can't open output file for write */
- err_macroexists, /* this macro already exists */
- err_sectjump, /* jump a heading level, eg \C -> \S */
- err_winhelp_ctxclash, /* WinHelp context ID hash clash */
- err_multikw, /* keyword clash in sections */
- err_whatever /* random error of another type */
-};
-
-/*
- * malloc.c
- */
-#ifdef LOGALLOC
-void *smalloc(char *file, int line, int size);
-void *srealloc(char *file, int line, void *p, int size);
-void sfree(char *file, int line, void *p);
-#define smalloc(x) smalloc(__FILE__, __LINE__, x)
-#define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y)
-#define sfree(x) sfree(__FILE__, __LINE__, x)
-#else
-void *smalloc(int size);
-void *srealloc(void *p, int size);
-void sfree(void *p);
-#endif
-void free_word_list(word * w);
-void free_para_list(paragraph * p);
-word *dup_word_list(word * w);
-char *dupstr(char *s);
-
-#define mknew(type) ( (type *) smalloc (sizeof (type)) )
-#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) )
-#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) )
-#define lenof(array) ( sizeof(array) / sizeof(*(array)) )
-
-/*
- * ustring.c
- */
-wchar_t *ustrdup(wchar_t * s);
-char *ustrtoa(wchar_t * s, char *outbuf, int size);
-int ustrlen(wchar_t * s);
-wchar_t *uadv(wchar_t * s);
-wchar_t *ustrcpy(wchar_t * dest, wchar_t * source);
-wchar_t utolower(wchar_t);
-int ustrcmp(wchar_t * lhs, wchar_t * rhs);
-int ustricmp(wchar_t * lhs, wchar_t * rhs);
-int utoi(wchar_t *);
-int utob(wchar_t *);
-int uisdigit(wchar_t);
-wchar_t *ustrlow(wchar_t * s);
-wchar_t *ustrftime(wchar_t * fmt, struct tm *timespec);
-
-/*
- * help.c
- */
-void usage(void);
-void showversion(void);
-
-/*
- * licence.c
- */
-void licence(void);
-
-/*
- * version.c
- */
-const char *const version;
-
-/*
- * misc.c
- */
-typedef struct stackTag *stack;
-stack stk_new(void);
-void stk_free(stack);
-void stk_push(stack, void *);
-void *stk_pop(stack);
-
-typedef struct tagRdstring rdstring;
-struct tagRdstring {
- int pos, size;
- wchar_t *text;
-};
-typedef struct tagRdstringc rdstringc;
-struct tagRdstringc {
- int pos, size;
- char *text;
-};
-extern const rdstring empty_rdstring;
-extern const rdstringc empty_rdstringc;
-void rdadd(rdstring * rs, wchar_t c);
-void rdadds(rdstring * rs, wchar_t * p);
-wchar_t *rdtrim(rdstring * rs);
-void rdaddc(rdstringc * rs, char c);
-void rdaddsc(rdstringc * rs, char *p);
-char *rdtrimc(rdstringc * rs);
-
-int compare_wordlists(word * a, word * b);
-
-void mark_attr_ends(paragraph * sourceform);
-
-typedef struct tagWrappedLine wrappedline;
-struct tagWrappedLine {
- wrappedline *next;
- word *begin, *end; /* first & last words of line */
- int nspaces; /* number of whitespaces in line */
- int shortfall; /* how much shorter than max width */
-};
-wrappedline *wrap_para(word *, int, int, int (*)(word *));
-void wrap_free(wrappedline *);
-
-/*
- * input.c
- */
-paragraph *read_input(input * in, indexdata * idx);
-
-/*
- * keywords.c
- */
-struct keywordlist_Tag {
- int nkeywords;
- int size;
- tree234 *keys; /* sorted by `key' field */
- word **looseends; /* non-keyword list element numbers */
- int nlooseends;
- int looseendssize;
-};
-struct keyword_Tag {
- wchar_t *key; /* the keyword itself */
- word *text; /* "Chapter 2", "Appendix Q"... */
- /* (NB: filepos are not set) */
- paragraph *para; /* the paragraph referenced */
-};
-keyword *kw_lookup(keywordlist *, wchar_t *);
-keywordlist *get_keywords(paragraph *);
-void free_keywords(keywordlist *);
-void subst_keywords(paragraph *, keywordlist *);
-
-/*
- * index.c
- */
-
-/*
- * Data structure to hold both sides of the index.
- */
-struct indexdata_Tag {
- tree234 *tags; /* holds type `indextag' */
- tree234 *entries; /* holds type `indexentry' */
-};
-
-/*
- * Data structure to hold an index tag (LHS of index).
- */
-struct indextag_Tag {
- wchar_t *name;
- word *implicit_text;
- word **explicit_texts;
- int nexplicit, explicit_size;
- int nrefs;
- indexentry **refs; /* array of entries referenced by tag */
-};
-
-/*
- * Data structure to hold an index entry (RHS of index).
- */
-struct indexentry_Tag {
- word *text;
- void *backend_data; /* private to back end */
-};
-
-indexdata *make_index(void);
-void cleanup_index(indexdata *);
-/* index_merge takes responsibility for freeing arg 3 iff implicit; never
- * takes responsibility for arg 2 */
-void index_merge(indexdata *, int is_explicit, wchar_t *, word *);
-void build_index(indexdata *);
-void index_debug(indexdata *);
-indextag *index_findtag(indexdata * idx, wchar_t * name);
-
-/*
- * contents.c
- */
-numberstate *number_init(void);
-void number_cfg(numberstate *, paragraph *);
-word *number_mktext(numberstate *, paragraph *, wchar_t *, int, int *);
-void number_free(numberstate *);
-
-/*
- * biblio.c
- */
-void gen_citations(paragraph *, keywordlist *);
-
-/*
- * style.c
- */
-struct userstyle_Tag {
- void* empty;
-};
-
-/*
- * bk_xhtml.c
- */
-void xhtml_backend(paragraph *, keywordlist *, indexdata *);
-
-#endif
+#ifndef HALIBUT_HALIBUT_H +#define HALIBUT_HALIBUT_H + +#include <stdio.h> +#include <wchar.h> +#include <time.h> + +#ifdef __GNUC__ +#define NORETURN __attribute__((__noreturn__)) +#else +#define NORETURN /* nothing */ +#endif + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +/* For suppressing unused-parameter warnings */ +#define IGNORE(x) ( (x) = (x) ) + +#include "tree234.h" + +/* + * Structure tags + */ +typedef struct input_Tag input; +typedef struct filepos_Tag filepos; +typedef struct paragraph_Tag paragraph; +typedef struct word_Tag word; +typedef struct keywordlist_Tag keywordlist; +typedef struct keyword_Tag keyword; +typedef struct userstyle_Tag userstyle; +typedef struct numberstate_Tag numberstate; +typedef struct indexdata_Tag indexdata; +typedef struct indextag_Tag indextag; +typedef struct indexentry_Tag indexentry; +typedef struct macrostack_Tag macrostack; + +/* + * Data structure to hold a file name and index, a line and a + * column number, for reporting errors + */ +struct filepos_Tag { + char *filename; + int line, col; +}; + +/* + * Data structure to hold all the file names etc for input + */ +typedef struct pushback_Tag { + int chr; + filepos pos; +} pushback; +struct input_Tag { + char **filenames; /* complete list of input files */ + int nfiles; /* how many in the list */ + FILE *currfp; /* the currently open one */ + int currindex; /* which one is that in the list */ + pushback *pushback; /* pushed-back input characters */ + int npushback, pushbacksize; + filepos pos; + int reportcols; /* report column numbers in errors */ + macrostack *stack; /* macro expansions in force */ +}; + +/* + * Data structure to hold the input form of the source, ie a linked + * list of paragraphs + */ +struct paragraph_Tag { + paragraph *next; + int type; + wchar_t *keyword; /* for most special paragraphs */ + word *words; /* list of words in paragraph */ + int aux; /* number, in a numbered paragraph + * or subsection level + */ + word *kwtext; /* chapter/section indication */ + word *kwtext2; /* numeric-only form of kwtext */ + filepos fpos; + + paragraph *parent, *child, *sibling; /* for hierarchy navigation */ + + void *private_data; /* for temp use in backends */ +}; +enum { + para_IM, /* index merge */ + para_BR, /* bibliography rewrite */ + para_Rule, /* random horizontal rule */ + para_Chapter, + para_Appendix, + para_UnnumberedChapter, + para_Heading, + para_Subsect, + para_Normal, + para_Biblio, /* causes no output unless turned ... */ + para_BiblioCited, /* ... into this paragraph type */ + para_Bullet, + para_NumberedList, + para_Code, + para_Copyright, + para_Preamble, + para_NoCite, + para_Title, + para_VersionID, + para_Config, /* configuration directive */ + para_NotParaType /* placeholder value */ +}; + +/* + * Data structure to hold an individual word + */ +struct word_Tag { + word *next, *alt; + int type; + int aux; + int breaks; /* can a line break after it? */ + wchar_t *text; + filepos fpos; +}; +enum { + /* ORDERING CONSTRAINT: these normal-word types ... */ + word_Normal, + word_Emph, + word_Code, /* monospaced; `quoted' in text */ + word_WeakCode, /* monospaced, normal in text */ + /* ... must be in the same order as these space types ... */ + word_WhiteSpace, /* text is NULL or ignorable */ + word_EmphSpace, /* WhiteSpace when emphasised */ + word_CodeSpace, /* WhiteSpace when code */ + word_WkCodeSpace, /* WhiteSpace when weak code */ + /* ... and must be in the same order as these quote types ... */ + word_Quote, /* text is NULL or ignorable */ + word_EmphQuote, /* Quote when emphasised */ + word_CodeQuote, /* (can't happen) */ + word_WkCodeQuote, /* (can't happen) */ + /* END ORDERING CONSTRAINT */ + word_internal_endattrs, + word_UpperXref, /* \K */ + word_LowerXref, /* \k */ + word_XrefEnd, /* (invisible; no text) */ + word_IndexRef, /* (always an invisible one) */ + word_HyperLink, /* (invisible) */ + word_HyperEnd, /* (also invisible; no text) */ + word_LocalHyperLink, /* (invisible) */ + word_FreeTextXref /* \R */ +}; +/* aux values for attributed words */ +enum { + attr_Only = 0x0000, /* a lone word with the attribute */ + attr_First = 0x0001, /* the first of a series */ + attr_Last = 0x0002, /* the last of a series */ + attr_Always = 0x0003, /* any other part of a series */ + attr_mask = 0x0003, +}; +/* aux values for quote-type words */ +enum { + quote_Open = 0x0010, + quote_Close = 0x0020, + quote_mask = 0x0030, +}; +#define isattr(x) ( ( (x) > word_Normal && (x) < word_WhiteSpace ) || \ + ( (x) > word_WhiteSpace && (x) < word_internal_endattrs ) ) +#define sameattr(x,y) ( (((x)-(y)) & 3) == 0 ) +#define towordstyle(x) ( word_Normal + ((x) & 3) ) +#define tospacestyle(x) ( word_WhiteSpace + ((x) & 3) ) +#define toquotestyle(x) ( word_Quote + ((x) & 3) ) +#define removeattr(x) ( word_Normal + ((x) &~ 3) ) + +#define attraux(x) ( (x) & attr_mask ) +#define quoteaux(x) ( (x) & quote_mask ) + +/* + * error.c + */ +void fatal(int code, ...) NORETURN; +void error(int code, ...); +enum { + err_nomemory, /* out of memory */ + err_optnoarg, /* option `-%s' requires an argument */ + err_nosuchopt, /* unrecognised option `-%s' */ + err_noinput, /* no input files */ + err_cantopen, /* unable to open input file `%s' */ + err_nodata, /* no data in input files */ + err_brokencodepara, /* line in codepara didn't begin `\c' */ + err_kwunclosed, /* expected `}' after keyword */ + err_kwillegal, /* paragraph type expects no keyword */ + err_kwexpected, /* paragraph type expects a keyword */ + err_kwtoomany, /* paragraph type expects only 1 */ + err_bodyillegal, /* paragraph type expects only kws! */ + err_badparatype, /* invalid command at start of para */ + err_badmidcmd, /* invalid command in mid-para */ + err_unexbrace, /* unexpected brace */ + err_explbr, /* expected `{' after command */ + err_commenteof, /* EOF inside braced comment */ + err_kwexprbr, /* expected `}' after cross-ref */ + err_missingrbrace, /* unclosed braces at end of para */ + err_nestedstyles, /* unable to nest text styles */ + err_nestedindex, /* unable to nest `\i' thingys */ + err_nosuchkw, /* unresolved cross-reference */ + err_multiBR, /* multiple \BRs on same keyword */ + err_nosuchidxtag, /* \IM on unknown index tag (warning) */ + err_cantopenw, /* can't open output file for write */ + err_macroexists, /* this macro already exists */ + err_sectjump, /* jump a heading level, eg \C -> \S */ + err_winhelp_ctxclash, /* WinHelp context ID hash clash */ + err_multikw, /* keyword clash in sections */ + err_whatever /* random error of another type */ +}; + +/* + * malloc.c + */ +#ifdef LOGALLOC +void *smalloc(char *file, int line, int size); +void *srealloc(char *file, int line, void *p, int size); +void sfree(char *file, int line, void *p); +#define smalloc(x) smalloc(__FILE__, __LINE__, x) +#define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y) +#define sfree(x) sfree(__FILE__, __LINE__, x) +#else +void *smalloc(int size); +void *srealloc(void *p, int size); +void sfree(void *p); +#endif +void free_word_list(word * w); +void free_para_list(paragraph * p); +word *dup_word_list(word * w); +char *dupstr(char *s); + +#define mknew(type) ( (type *) smalloc (sizeof (type)) ) +#define mknewa(type, number) ( (type *) smalloc ((number) * sizeof (type)) ) +#define resize(array, len) ( srealloc ((array), (len) * sizeof (*(array))) ) +#define lenof(array) ( sizeof(array) / sizeof(*(array)) ) + +/* + * ustring.c + */ +wchar_t *ustrdup(wchar_t * s); +char *ustrtoa(wchar_t * s, char *outbuf, int size); +int ustrlen(wchar_t * s); +wchar_t *uadv(wchar_t * s); +wchar_t *ustrcpy(wchar_t * dest, wchar_t * source); +wchar_t utolower(wchar_t); +int ustrcmp(wchar_t * lhs, wchar_t * rhs); +int ustricmp(wchar_t * lhs, wchar_t * rhs); +int utoi(wchar_t *); +int utob(wchar_t *); +int uisdigit(wchar_t); +wchar_t *ustrlow(wchar_t * s); +wchar_t *ustrftime(wchar_t * fmt, struct tm *timespec); + +/* + * help.c + */ +void usage(void); +void showversion(void); + +/* + * licence.c + */ +void licence(void); + +/* + * version.c + */ +const char *const version; + +/* + * misc.c + */ +typedef struct stackTag *stack; +stack stk_new(void); +void stk_free(stack); +void stk_push(stack, void *); +void *stk_pop(stack); + +typedef struct tagRdstring rdstring; +struct tagRdstring { + int pos, size; + wchar_t *text; +}; +typedef struct tagRdstringc rdstringc; +struct tagRdstringc { + int pos, size; + char *text; +}; +extern const rdstring empty_rdstring; +extern const rdstringc empty_rdstringc; +void rdadd(rdstring * rs, wchar_t c); +void rdadds(rdstring * rs, wchar_t * p); +wchar_t *rdtrim(rdstring * rs); +void rdaddc(rdstringc * rs, char c); +void rdaddsc(rdstringc * rs, char *p); +char *rdtrimc(rdstringc * rs); + +int compare_wordlists(word * a, word * b); + +void mark_attr_ends(paragraph * sourceform); + +typedef struct tagWrappedLine wrappedline; +struct tagWrappedLine { + wrappedline *next; + word *begin, *end; /* first & last words of line */ + int nspaces; /* number of whitespaces in line */ + int shortfall; /* how much shorter than max width */ +}; +wrappedline *wrap_para(word *, int, int, int (*)(word *)); +void wrap_free(wrappedline *); + +/* + * input.c + */ +paragraph *read_input(input * in, indexdata * idx); + +/* + * keywords.c + */ +struct keywordlist_Tag { + int nkeywords; + int size; + tree234 *keys; /* sorted by `key' field */ + word **looseends; /* non-keyword list element numbers */ + int nlooseends; + int looseendssize; +}; +struct keyword_Tag { + wchar_t *key; /* the keyword itself */ + word *text; /* "Chapter 2", "Appendix Q"... */ + /* (NB: filepos are not set) */ + paragraph *para; /* the paragraph referenced */ +}; +keyword *kw_lookup(keywordlist *, wchar_t *); +keywordlist *get_keywords(paragraph *); +void free_keywords(keywordlist *); +void subst_keywords(paragraph *, keywordlist *); + +/* + * index.c + */ + +/* + * Data structure to hold both sides of the index. + */ +struct indexdata_Tag { + tree234 *tags; /* holds type `indextag' */ + tree234 *entries; /* holds type `indexentry' */ +}; + +/* + * Data structure to hold an index tag (LHS of index). + */ +struct indextag_Tag { + wchar_t *name; + word *implicit_text; + word **explicit_texts; + int nexplicit, explicit_size; + int nrefs; + indexentry **refs; /* array of entries referenced by tag */ +}; + +/* + * Data structure to hold an index entry (RHS of index). + */ +struct indexentry_Tag { + word *text; + void *backend_data; /* private to back end */ +}; + +indexdata *make_index(void); +void cleanup_index(indexdata *); +/* index_merge takes responsibility for freeing arg 3 iff implicit; never + * takes responsibility for arg 2 */ +void index_merge(indexdata *, int is_explicit, wchar_t *, word *); +void build_index(indexdata *); +void index_debug(indexdata *); +indextag *index_findtag(indexdata * idx, wchar_t * name); + +/* + * contents.c + */ +numberstate *number_init(void); +void number_cfg(numberstate *, paragraph *); +word *number_mktext(numberstate *, paragraph *, wchar_t *, int, int *); +void number_free(numberstate *); + +/* + * biblio.c + */ +void gen_citations(paragraph *, keywordlist *); + +/* + * style.c + */ +struct userstyle_Tag { + void* empty; +}; + +/* + * bk_xhtml.c + */ +void xhtml_backend(paragraph *, keywordlist *, indexdata *); + +#endif diff --git a/Docs/src/bin/halibut/help.c b/Docs/src/bin/halibut/help.c index bcb3701..7329494 100755 --- a/Docs/src/bin/halibut/help.c +++ b/Docs/src/bin/halibut/help.c @@ -1,23 +1,23 @@ -/*
- * help.c: usage instructions
- */
-
-#include <stdio.h>
-#include "halibut.h"
-
-static char *usagetext[] = {
- "halibut.exe file1 [file2 ...]",
- NULL
-};
-
-void usage(void)
-{
- char **p;
- for (p = usagetext; *p; p++)
- puts(*p);
-}
-
-void showversion(void)
-{
- printf("Halibut, %s\n", version);
-}
+/* + * help.c: usage instructions + */ + +#include <stdio.h> +#include "halibut.h" + +static char *usagetext[] = { + "halibut.exe file1 [file2 ...]", + NULL +}; + +void usage(void) +{ + char **p; + for (p = usagetext; *p; p++) + puts(*p); +} + +void showversion(void) +{ + printf("Halibut, %s\n", version); +} diff --git a/Docs/src/bin/halibut/index.c b/Docs/src/bin/halibut/index.c index 94ccf52..e95d67a 100755 --- a/Docs/src/bin/halibut/index.c +++ b/Docs/src/bin/halibut/index.c @@ -1,263 +1,263 @@ -/*
- * index.c: create and collate index data structures
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "halibut.h"
-
-static int compare_tags(void *av, void *bv);
-static int compare_entries(void *av, void *bv);
-
-indexdata *make_index(void)
-{
- indexdata *ret = mknew(indexdata);
- ret->tags = newtree234(compare_tags);
- ret->entries = newtree234(compare_entries);
- return ret;
-}
-
-static indextag *make_indextag(void)
-{
- indextag *ret = mknew(indextag);
- ret->name = NULL;
- ret->implicit_text = NULL;
- ret->explicit_texts = NULL;
- ret->nexplicit = ret->explicit_size = ret->nrefs = 0;
- ret->refs = NULL;
- return ret;
-}
-
-static int compare_tags(void *av, void *bv)
-{
- indextag *a = (indextag *) av, *b = (indextag *) bv;
- return ustricmp(a->name, b->name);
-}
-
-static int compare_to_find_tag(void *av, void *bv)
-{
- wchar_t *a = (wchar_t *) av;
- indextag *b = (indextag *) bv;
- return ustricmp(a, b->name);
-}
-
-static int compare_entries(void *av, void *bv)
-{
- indexentry *a = (indexentry *) av, *b = (indexentry *) bv;
- return compare_wordlists(a->text, b->text);
-}
-
-/*
- * Back-end utility: find the indextag with a given name.
- */
-indextag *index_findtag(indexdata * idx, wchar_t * name)
-{
- return find234(idx->tags, name, compare_to_find_tag);
-}
-
-/*
- * Add a \IM. `tags' points to a zero-terminated chain of
- * zero-terminated strings ("first\0second\0thirdandlast\0\0").
- * `text' points to a word list.
- *
- * Guarantee on calling sequence: all implicit merges are given
- * before the explicit ones.
- */
-void
-index_merge(indexdata * idx, int is_explicit, wchar_t * tags, word * text)
-{
- indextag *t, *existing;
-
- /*
- * FIXME: want to warn on overlapping source sets.
- */
- for (; *tags; tags = uadv(tags))
- {
- t = make_indextag();
- t->name = tags;
- existing = add234(idx->tags, t);
- if (existing == t)
- {
- /*
- * Duplicate this so we can free it independently.
- */
- t->name = ustrdup(tags);
-
- /*
- * Every tag has an implicit \IM. So if this tag
- * doesn't exist and we're explicit, then we should
- * warn (and drop it, since it won't be referenced).
- */
- if (is_explicit)
- {
- error(err_nosuchidxtag, tags);
- continue;
- }
-
- /*
- * Otherwise, this is a new tag with an implicit \IM.
- */
- t->implicit_text = text;
- } else
- {
- sfree(t);
- t = existing;
- if (!is_explicit)
- {
- /*
- * An implicit \IM for a tag that's had an implicit
- * \IM before. FIXME: we should check the text
- * against the existing text and warn on
- * differences. And check the tag for case match
- * against the existing tag, likewise.
- */
- } else
- {
- /*
- * An explicit \IM added to a valid tag. In
- * particular, this removes the implicit \IM if
- * present.
- */
- if (t->implicit_text)
- {
- free_word_list(t->implicit_text);
- t->implicit_text = NULL;
- }
- if (t->nexplicit >= t->explicit_size)
- {
- t->explicit_size = t->nexplicit + 8;
- t->explicit_texts = resize(t->explicit_texts, t->explicit_size);
- }
- t->explicit_texts[t->nexplicit++] = text;
- }
- }
- }
-}
-
-/*
- * Build the final-form index. We now have every tag, with every
- * \IM, set up in a 2-3 tree indexed by tag. We now want to collate
- * the RHSes of the \IMs, and sort by final form, and decorate the
- * entries in the original 2-3 tree with pointers to the RHS
- * entries.
- */
-void build_index(indexdata * i)
-{
- indextag *t;
- word **ta;
- int ti;
- int j;
-
- for (ti = 0; (t = (indextag *) index234(i->tags, ti)) != NULL; ti++)
- {
- if (t->implicit_text)
- {
- t->nrefs = 1;
- ta = &t->implicit_text;
- } else
- {
- t->nrefs = t->nexplicit;
- ta = t->explicit_texts;
- }
- if (t->nrefs)
- {
- t->refs = mknewa(indexentry *, t->nrefs);
- for (j = 0; j < t->nrefs; j++)
- {
- indexentry *ent = mknew(indexentry);
- ent->text = *ta++;
- t->refs[j] = add234(i->entries, ent);
- if (t->refs[j] != ent) /* duplicate */
- sfree(ent);
- }
- }
- }
-}
-
-void cleanup_index(indexdata * i)
-{
- indextag *t;
- indexentry *ent;
- int ti;
-
- for (ti = 0; (t = (indextag *) index234(i->tags, ti)) != NULL; ti++)
- {
- sfree(t->name);
- free_word_list(t->implicit_text);
- sfree(t->explicit_texts);
- sfree(t->refs);
- sfree(t);
- }
- freetree234(i->tags);
- for (ti = 0; (ent = (indexentry *) index234(i->entries, ti)) != NULL;
- ti++)
- {
- sfree(ent);
- }
- freetree234(i->entries);
- sfree(i);
-}
-
-static void dbg_prtwordlist(int level, word * w);
-static void dbg_prtmerge(int is_explicit, wchar_t * tag, word * text);
-
-void index_debug(indexdata * i)
-{
- indextag *t;
- indexentry *y;
- int ti;
- int j;
-
- printf("\nINDEX TAGS\n==========\n\n");
- for (ti = 0; (t = (indextag *) index234(i->tags, ti)) != NULL; ti++)
- {
- printf("\n");
- if (t->implicit_text)
- dbg_prtmerge(0, t->name, t->implicit_text);
- for (j = 0; j < t->nexplicit; j++)
- dbg_prtmerge(1, t->name, t->explicit_texts[j]);
- }
-
- printf("\nINDEX ENTRIES\n=============\n\n");
- for (ti = 0; (y = (indexentry *) index234(i->entries, ti)) != NULL; ti++)
- {
- printf("\n");
- printf("{\n");
- dbg_prtwordlist(1, y->text);
- printf("}\n");
- }
-}
-
-static void dbg_prtmerge(int is_explicit, wchar_t * tag, word * text)
-{
- printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
- for (; *tag; tag++)
- putchar(*tag);
- printf("\" {\n");
- dbg_prtwordlist(1, text);
- printf("}\n");
-}
-
-static void dbg_prtwordlist(int level, word * w)
-{
- for (; w; w = w->next)
- {
- wchar_t *wp;
- printf("%*sword %d ", level * 4, "", w->type);
- if (w->text)
- {
- printf("\"");
- for (wp = w->text; *wp; wp++)
- putchar(*wp);
- printf("\"");
- } else
- printf("(no text)");
- if (w->alt)
- {
- printf(" alt = {\n");
- dbg_prtwordlist(level + 1, w->alt);
- printf("%*s}", level * 4, "");
- }
- printf("\n");
- }
-}
+/* + * index.c: create and collate index data structures + */ + +#include <stdio.h> +#include <stdlib.h> +#include "halibut.h" + +static int compare_tags(void *av, void *bv); +static int compare_entries(void *av, void *bv); + +indexdata *make_index(void) +{ + indexdata *ret = mknew(indexdata); + ret->tags = newtree234(compare_tags); + ret->entries = newtree234(compare_entries); + return ret; +} + +static indextag *make_indextag(void) +{ + indextag *ret = mknew(indextag); + ret->name = NULL; + ret->implicit_text = NULL; + ret->explicit_texts = NULL; + ret->nexplicit = ret->explicit_size = ret->nrefs = 0; + ret->refs = NULL; + return ret; +} + +static int compare_tags(void *av, void *bv) +{ + indextag *a = (indextag *) av, *b = (indextag *) bv; + return ustricmp(a->name, b->name); +} + +static int compare_to_find_tag(void *av, void *bv) +{ + wchar_t *a = (wchar_t *) av; + indextag *b = (indextag *) bv; + return ustricmp(a, b->name); +} + +static int compare_entries(void *av, void *bv) +{ + indexentry *a = (indexentry *) av, *b = (indexentry *) bv; + return compare_wordlists(a->text, b->text); +} + +/* + * Back-end utility: find the indextag with a given name. + */ +indextag *index_findtag(indexdata * idx, wchar_t * name) +{ + return find234(idx->tags, name, compare_to_find_tag); +} + +/* + * Add a \IM. `tags' points to a zero-terminated chain of + * zero-terminated strings ("first\0second\0thirdandlast\0\0"). + * `text' points to a word list. + * + * Guarantee on calling sequence: all implicit merges are given + * before the explicit ones. + */ +void +index_merge(indexdata * idx, int is_explicit, wchar_t * tags, word * text) +{ + indextag *t, *existing; + + /* + * FIXME: want to warn on overlapping source sets. + */ + for (; *tags; tags = uadv(tags)) + { + t = make_indextag(); + t->name = tags; + existing = add234(idx->tags, t); + if (existing == t) + { + /* + * Duplicate this so we can free it independently. + */ + t->name = ustrdup(tags); + + /* + * Every tag has an implicit \IM. So if this tag + * doesn't exist and we're explicit, then we should + * warn (and drop it, since it won't be referenced). + */ + if (is_explicit) + { + error(err_nosuchidxtag, tags); + continue; + } + + /* + * Otherwise, this is a new tag with an implicit \IM. + */ + t->implicit_text = text; + } else + { + sfree(t); + t = existing; + if (!is_explicit) + { + /* + * An implicit \IM for a tag that's had an implicit + * \IM before. FIXME: we should check the text + * against the existing text and warn on + * differences. And check the tag for case match + * against the existing tag, likewise. + */ + } else + { + /* + * An explicit \IM added to a valid tag. In + * particular, this removes the implicit \IM if + * present. + */ + if (t->implicit_text) + { + free_word_list(t->implicit_text); + t->implicit_text = NULL; + } + if (t->nexplicit >= t->explicit_size) + { + t->explicit_size = t->nexplicit + 8; + t->explicit_texts = resize(t->explicit_texts, t->explicit_size); + } + t->explicit_texts[t->nexplicit++] = text; + } + } + } +} + +/* + * Build the final-form index. We now have every tag, with every + * \IM, set up in a 2-3 tree indexed by tag. We now want to collate + * the RHSes of the \IMs, and sort by final form, and decorate the + * entries in the original 2-3 tree with pointers to the RHS + * entries. + */ +void build_index(indexdata * i) +{ + indextag *t; + word **ta; + int ti; + int j; + + for (ti = 0; (t = (indextag *) index234(i->tags, ti)) != NULL; ti++) + { + if (t->implicit_text) + { + t->nrefs = 1; + ta = &t->implicit_text; + } else + { + t->nrefs = t->nexplicit; + ta = t->explicit_texts; + } + if (t->nrefs) + { + t->refs = mknewa(indexentry *, t->nrefs); + for (j = 0; j < t->nrefs; j++) + { + indexentry *ent = mknew(indexentry); + ent->text = *ta++; + t->refs[j] = add234(i->entries, ent); + if (t->refs[j] != ent) /* duplicate */ + sfree(ent); + } + } + } +} + +void cleanup_index(indexdata * i) +{ + indextag *t; + indexentry *ent; + int ti; + + for (ti = 0; (t = (indextag *) index234(i->tags, ti)) != NULL; ti++) + { + sfree(t->name); + free_word_list(t->implicit_text); + sfree(t->explicit_texts); + sfree(t->refs); + sfree(t); + } + freetree234(i->tags); + for (ti = 0; (ent = (indexentry *) index234(i->entries, ti)) != NULL; + ti++) + { + sfree(ent); + } + freetree234(i->entries); + sfree(i); +} + +static void dbg_prtwordlist(int level, word * w); +static void dbg_prtmerge(int is_explicit, wchar_t * tag, word * text); + +void index_debug(indexdata * i) +{ + indextag *t; + indexentry *y; + int ti; + int j; + + printf("\nINDEX TAGS\n==========\n\n"); + for (ti = 0; (t = (indextag *) index234(i->tags, ti)) != NULL; ti++) + { + printf("\n"); + if (t->implicit_text) + dbg_prtmerge(0, t->name, t->implicit_text); + for (j = 0; j < t->nexplicit; j++) + dbg_prtmerge(1, t->name, t->explicit_texts[j]); + } + + printf("\nINDEX ENTRIES\n=============\n\n"); + for (ti = 0; (y = (indexentry *) index234(i->entries, ti)) != NULL; ti++) + { + printf("\n"); + printf("{\n"); + dbg_prtwordlist(1, y->text); + printf("}\n"); + } +} + +static void dbg_prtmerge(int is_explicit, wchar_t * tag, word * text) +{ + printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im"); + for (; *tag; tag++) + putchar(*tag); + printf("\" {\n"); + dbg_prtwordlist(1, text); + printf("}\n"); +} + +static void dbg_prtwordlist(int level, word * w) +{ + for (; w; w = w->next) + { + wchar_t *wp; + printf("%*sword %d ", level * 4, "", w->type); + if (w->text) + { + printf("\""); + for (wp = w->text; *wp; wp++) + putchar(*wp); + printf("\""); + } else + printf("(no text)"); + if (w->alt) + { + printf(" alt = {\n"); + dbg_prtwordlist(level + 1, w->alt); + printf("%*s}", level * 4, ""); + } + printf("\n"); + } +} diff --git a/Docs/src/bin/halibut/input.c b/Docs/src/bin/halibut/input.c index f8e4f71..c14f10e 100755 --- a/Docs/src/bin/halibut/input.c +++ b/Docs/src/bin/halibut/input.c @@ -1,1488 +1,1488 @@ -/*
- * input.c: read the source form
- */
-
-#include <stdio.h>
-#include <assert.h>
-#include <time.h>
-#include "halibut.h"
-
-#define TAB_STOP 8 /* for column number tracking */
-
-static void setpos(input * in, char *fname)
-{
- in->pos.filename = fname;
- in->pos.line = 1;
- in->pos.col = (in->reportcols ? 1 : -1);
-}
-
-static void unget(input * in, int c, filepos * pos)
-{
- if (in->npushback >= in->pushbacksize)
- {
- in->pushbacksize = in->npushback + 16;
- in->pushback = resize(in->pushback, in->pushbacksize);
- }
- in->pushback[in->npushback].chr = c;
- in->pushback[in->npushback].pos = *pos; /* structure copy */
- in->npushback++;
-}
-
-/* ---------------------------------------------------------------------- */
-/*
- * Macro subsystem
- */
-typedef struct macro_Tag macro;
-struct macro_Tag {
- wchar_t *name, *text;
-};
-struct macrostack_Tag {
- macrostack *next;
- wchar_t *text;
- int ptr, npushback;
- filepos pos;
-};
-static int macrocmp(void *av, void *bv)
-{
- macro *a = (macro *) av, *b = (macro *) bv;
- return ustrcmp(a->name, b->name);
-}
-static void
-macrodef(tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos)
-{
- macro *m = mknew(macro);
- m->name = name;
- m->text = text;
- if (add234(macros, m) != m)
- {
- error(err_macroexists, &fpos, name);
- sfree(name);
- sfree(text);
- }
-}
-static int
-macrolookup(tree234 * macros, input * in, wchar_t * name, filepos * pos)
-{
- macro m, *gotit;
- m.name = name;
- gotit = find234(macros, &m, NULL);
- if (gotit)
- {
- macrostack *expansion = mknew(macrostack);
- expansion->next = in->stack;
- expansion->text = gotit->text;
- expansion->pos = *pos; /* structure copy */
- expansion->ptr = 0;
- expansion->npushback = in->npushback;
- in->stack = expansion;
- return TRUE;
- } else
- return FALSE;
-}
-static void macrocleanup(tree234 * macros)
-{
- int ti;
- macro *m;
- for (ti = 0; (m = (macro *) index234(macros, ti)) != NULL; ti++)
- {
- sfree(m->name);
- sfree(m->text);
- sfree(m);
- }
- freetree234(macros);
-}
-
-/*
- * Can return EOF
- */
-static int get(input * in, filepos * pos)
-{
- int pushbackpt = in->stack ? in->stack->npushback : 0;
- if (in->npushback > pushbackpt)
- {
- --in->npushback;
- if (pos)
- *pos = in->pushback[in->npushback].pos; /* structure copy */
- return in->pushback[in->npushback].chr;
- } else if (in->stack)
- {
- wchar_t c = in->stack->text[in->stack->ptr];
- if (in->stack->text[++in->stack->ptr] == L'\0')
- {
- macrostack *tmp = in->stack;
- in->stack = tmp->next;
- sfree(tmp);
- }
- return c;
- } else if (in->currfp)
- {
- int c = getc(in->currfp);
-
- if (c == EOF)
- {
- fclose(in->currfp);
- in->currfp = NULL;
- }
- /* Track line numbers, for error reporting */
- if (pos)
- *pos = in->pos;
- if (in->reportcols)
- {
- switch (c)
- {
- case '\t':
- in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP;
- break;
- case '\n':
- in->pos.col = 1;
- in->pos.line++;
- break;
- default:
- in->pos.col++;
- break;
- }
- } else
- {
- in->pos.col = -1;
- if (c == '\n')
- in->pos.line++;
- }
- /* FIXME: do input charmap translation. We should be returning
- * Unicode here. */
- return c;
- } else
- return EOF;
-}
-
-/*
- * Lexical analysis of source files.
- */
-typedef struct token_Tag token;
-struct token_Tag {
- int type;
- int cmd, aux;
- wchar_t *text;
- filepos pos;
-};
-enum {
- tok_eof, /* end of file */
- tok_eop, /* end of paragraph */
- tok_white, /* whitespace */
- tok_word, /* a word or word fragment */
- tok_cmd, /* \command */
- tok_lbrace, /* { */
- tok_rbrace /* } */
-};
-
-/* Halibut command keywords. */
-enum {
- c__invalid, /* invalid command */
- c__comment, /* comment command (\#) */
- c__escaped, /* escaped character */
- c__nbsp, /* nonbreaking space */
- c_A, /* appendix heading */
- c_B, /* bibliography entry */
- c_BR, /* bibliography rewrite */
- c_C, /* chapter heading */
- c_H, /* heading */
- c_I, /* invisible index mark */
- c_IM, /* index merge/rewrite */
- c_K, /* capitalised cross-reference */
- c_S, /* aux field is 0, 1, 2, ... */
- c_U, /* unnumbered-chapter heading */
- c_W, /* Web hyperlink */
- c_L, /* Relative/local hyperlink */
- c_b, /* bulletted list */
- c_c, /* code */
- c_cfg, /* configuration directive */
- c_copyright, /* copyright statement */
- c_cw, /* weak code */
- c_date, /* document processing date */
- c_define, /* macro definition */
- c_e, /* emphasis */
- c_i, /* visible index mark */
- c_ii, /* uncapitalised visible index mark */
- c_k, /* uncapitalised cross-reference */
- c_R, /* free text cross-reference */
- c_n, /* numbered list */
- c_nocite, /* bibliography trickery */
- c_preamble, /* document preamble text */
- c_q, /* quote marks */
- c_rule, /* horizontal rule */
- c_title, /* document title */
- c_u, /* aux field is char code */
- c_versionid /* document RCS id */
-};
-
-/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
-#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
-#define isnl(c) ( (c)==10 )
-#define isdec(c) ( ((c)>='0'&&(c)<='9') )
-#define fromdec(c) ( (c)-'0' )
-#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
-#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
-#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
-
-/*
- * Keyword comparison function. Like strcmp, but between a wchar_t *
- * and a char *.
- */
-static int kwcmp(wchar_t const *p, char const *q)
-{
- int i;
- do
- {
- i = *p - *q;
- }
- while (*p++ && *q++ && !i);
- return i;
-}
-
-/*
- * Match a keyword.
- */
-static void match_kw(token * tok)
-{
- /*
- * FIXME. The ids are explicit in here so as to allow long-name
- * equivalents to the various very short keywords.
- *
- * This list must be sorted, it's searched using binary search.
- */
- static const struct {
- char const *name;
- int id;
- } keywords[] = {
- {
- "#", c__comment}
- , /* comment command (\#) */
- {
- "-", c__escaped}
- , /* nonbreaking hyphen */
- {
- "A", c_A}
- , /* appendix heading */
- {
- "B", c_B}
- , /* bibliography entry */
- {
- "BR", c_BR}
- , /* bibliography rewrite */
- {
- "C", c_C}
- , /* chapter heading */
- {
- "H", c_H}
- , /* heading */
- {
- "I", c_I}
- , /* invisible index mark */
- {
- "IM", c_IM}
- , /* index merge/rewrite */
- {
- "K", c_K}
- , /* capitalised cross-reference */
- {
- "L", c_L}
- , /* Relative/local hyperlink */
- {
- "R", c_R}
- , /* free text cross-reference */
- {
- "U", c_U}
- , /* unnumbered-chapter heading */
- {
- "W", c_W}
- , /* Web hyperlink */
- {
- "\\", c__escaped}
- , /* escaped backslash (\\) */
- {
- "_", c__nbsp}
- , /* nonbreaking space (\_) */
- {
- "b", c_b}
- , /* bulletted list */
- {
- "c", c_c}
- , /* code */
- {
- "cfg", c_cfg}
- , /* configuration directive */
- {
- "copyright", c_copyright}
- , /* copyright statement */
- {
- "cw", c_cw}
- , /* weak code */
- {
- "date", c_date}
- , /* document processing date */
- {
- "define", c_define}
- , /* macro definition */
- {
- "e", c_e}
- , /* emphasis */
- {
- "i", c_i}
- , /* visible index mark */
- {
- "ii", c_ii}
- , /* uncapitalised visible index mark */
- {
- "k", c_k}
- , /* uncapitalised cross-reference */
- {
- "n", c_n}
- , /* numbered list */
- {
- "nocite", c_nocite}
- , /* bibliography trickery */
- {
- "preamble", c_preamble}
- , /* document preamble text */
- {
- "q", c_q}
- , /* quote marks */
- {
- "rule", c_rule}
- , /* horizontal rule */
- {
- "title", c_title}
- , /* document title */
- {
- "versionid", c_versionid}
- , /* document RCS id */
- {
- "{", c__escaped}
- , /* escaped lbrace (\{) */
- {
- "}", c__escaped}
- , /* escaped rbrace (\}) */
- };
- int i, j, k, c;
-
- /*
- * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
- * doesn't match correctly, we just fall through to the
- * binary-search phase.
- */
- if (tok->text[0] == 'S')
- {
- /* We expect numeric characters thereafter. */
- wchar_t *p = tok->text + 1;
- int n;
- if (!*p)
- n = 1;
- else
- {
- n = 0;
- while (*p && isdec(*p))
- {
- n = 10 * n + fromdec(*p);
- p++;
- }
- }
- if (!*p)
- {
- tok->cmd = c_S;
- tok->aux = n;
- return;
- }
- } else if (tok->text[0] == 'u')
- {
- /* We expect hex characters thereafter. */
- wchar_t *p = tok->text + 1;
- int n = 0;
- while (*p && ishex(*p))
- {
- n = 16 * n + fromhex(*p);
- p++;
- }
- if (!*p)
- {
- tok->cmd = c_u;
- tok->aux = n;
- return;
- }
- }
-
- i = -1;
- j = sizeof(keywords) / sizeof(*keywords);
- while (j - i > 1)
- {
- k = (i + j) / 2;
- c = kwcmp(tok->text, keywords[k].name);
- if (c < 0)
- j = k;
- else if (c > 0)
- i = k;
- else
- { /* c == 0 */
-
- tok->cmd = keywords[k].id;
- return;
- }
- }
-
- tok->cmd = c__invalid;
-}
-
-
-/*
- * Read a token from the input file, in the normal way (`normal' in
- * the sense that code paragraphs work a different way).
- */
-token get_token(input * in)
-{
- int c;
- int nls;
- token ret;
- rdstring rs = { 0, 0, NULL };
- filepos cpos;
-
- ret.cmd = c__invalid;
- ret.aux = FALSE;
- ret.text = NULL; /* default */
- c = get(in, &cpos);
- ret.pos = cpos;
- if (iswhite(c))
- { /* tok_white or tok_eop */
- nls = 0;
- do
- {
- if (isnl(c))
- nls++;
- }
- while ((c = get(in, &cpos)) != EOF && iswhite(c));
- if (c == EOF)
- {
- ret.type = tok_eof;
- return ret;
- }
- unget(in, c, &cpos);
- ret.type = (nls > 1 ? tok_eop : tok_white);
- return ret;
- } else if (c == EOF)
- { /* tok_eof */
- ret.type = tok_eof;
- return ret;
- } else if (c == '\\')
- { /* tok_cmd */
- c = get(in, &cpos);
- if (c == '-' || c == '\\' || c == '_' ||
- c == '#' || c == '{' || c == '}')
- {
- /* single-char command */
- rdadd(&rs, (wchar_t)c);
- } else if (c == 'u')
- {
- int len = 0;
- do
- {
- rdadd(&rs, (wchar_t)c);
- len++;
- c = get(in, &cpos);
- }
- while (ishex(c) && len < 5);
- unget(in, c, &cpos);
- } else if (iscmd(c))
- {
- do
- {
- rdadd(&rs, (wchar_t)c);
- c = get(in, &cpos);
- }
- while (iscmd(c));
- unget(in, c, &cpos);
- }
- /*
- * Now match the command against the list of available
- * ones.
- */
- ret.type = tok_cmd;
- ret.text = ustrdup(rs.text);
- match_kw(&ret);
- sfree(rs.text);
- return ret;
- } else if (c == '{')
- { /* tok_lbrace */
- ret.type = tok_lbrace;
- return ret;
- } else if (c == '}')
- { /* tok_rbrace */
- ret.type = tok_rbrace;
- return ret;
- } else
- { /* tok_word */
- /*
- * Read a word: the longest possible contiguous sequence of
- * things other than whitespace, backslash, braces and
- * hyphen. A hyphen terminates the word but is returned as
- * part of it; everything else is pushed back for the next
- * token. The `aux' field contains TRUE if the word ends in
- * a hyphen.
- */
- ret.aux = FALSE; /* assumed for now */
- while (1)
- {
- if (iswhite(c) || c == '{' || c == '}' || c == '\\' || c == EOF)
- {
- /* Put back the character that caused termination */
- unget(in, c, &cpos);
- break;
- } else
- {
- rdadd(&rs, (wchar_t)c);
- if (c == '-')
- {
- ret.aux = TRUE;
- break; /* hyphen terminates word */
- }
- }
- c = get(in, &cpos);
- }
- ret.type = tok_word;
- ret.text = ustrdup(rs.text);
- sfree(rs.text);
- return ret;
- }
-}
-
-/*
- * Determine whether the next input character is an open brace (for
- * telling code paragraphs from paragraphs which merely start with
- * code).
- */
-int isbrace(input * in)
-{
- int c;
- filepos cpos;
-
- c = get(in, &cpos);
- unget(in, c, &cpos);
- return (c == '{');
-}
-
-/*
- * Read the rest of a line that starts `\c'. Including nothing at
- * all (tok_word with empty text).
- */
-token get_codepar_token(input * in)
-{
- int c;
- token ret;
- rdstring rs = { 0, 0, NULL };
- filepos cpos;
-
- ret.type = tok_word;
- c = get(in, &cpos); /* expect (and discard) one space */
- ret.pos = cpos;
- if (c == ' ')
- {
- c = get(in, &cpos);
- ret.pos = cpos;
- }
- while (!isnl(c) && c != EOF)
- {
- int c2 = c;
- c = get(in, &cpos);
- /* Discard \r just before \n. */
- if (c2 != 13 || !isnl(c))
- rdadd(&rs, (wchar_t)c2);
- }
- unget(in, c, &cpos);
- ret.text = ustrdup(rs.text);
- sfree(rs.text);
- return ret;
-}
-
-/*
- * Adds a new word to a linked list
- */
-static word *addword(word newword, word *** hptrptr)
-{
- word *mnewword;
- if (!hptrptr)
- return NULL;
- mnewword = mknew(word);
- *mnewword = newword; /* structure copy */
- mnewword->next = NULL;
- **hptrptr = mnewword;
- *hptrptr = &mnewword->next;
- return mnewword;
-}
-
-/*
- * Adds a new paragraph to a linked list
- */
-static paragraph *addpara(paragraph newpara, paragraph *** hptrptr)
-{
- paragraph *mnewpara = mknew(paragraph);
- *mnewpara = newpara; /* structure copy */
- mnewpara->next = NULL;
- **hptrptr = mnewpara;
- *hptrptr = &mnewpara->next;
- return mnewpara;
-}
-
-/*
- * Destructor before token is reassigned; should catch most memory
- * leaks
- */
-#define dtor(t) ( sfree(t.text) )
-
-/*
- * Reads a single file (ie until get() returns EOF)
- */
-static void read_file(paragraph *** ret, input * in, indexdata * idx)
-{
- token t;
- paragraph par;
- word wd, **whptr, **idximplicit;
- tree234 *macros;
- wchar_t utext[2], *wdtext;
- int style, spcstyle;
- int already;
- int iswhite, seenwhite;
- int type;
- struct stack_item {
- enum {
- stack_nop = 0, /* do nothing (for error recovery) */
- stack_ualt = 1, /* \u alternative */
- stack_style = 2, /* \e, \c, \cw */
- stack_idx = 4, /* \I, \i, \ii */
- stack_hyper = 8, /* \W */
- stack_quote = 16, /* \q */
- } type;
- word **whptr; /* to restore from \u alternatives */
- word **idximplicit; /* to restore from \u alternatives */
- } *sitem;
- stack parsestk;
- word *indexword=NULL, *uword=NULL, *iword=NULL;
- word *idxwordlist;
- rdstring indexstr;
- int index_downcase=0, index_visible=0, indexing=0;
- const rdstring nullrs = { 0, 0, NULL };
- wchar_t uchr;
-
- t.text = NULL;
- macros = newtree234(macrocmp);
- already = FALSE;
-
- /*
- * Loop on each paragraph.
- */
- while (1)
- {
- int start_cmd = c__invalid;
- par.words = NULL;
- par.keyword = NULL;
- whptr = &par.words;
-
- /*
- * Get a token.
- */
- if (!already)
- {
- dtor(t), t = get_token(in);
- }
- already = FALSE;
- if (t.type == tok_eof)
- break;
-
- /*
- * Parse code paragraphs separately.
- */
- if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in))
- {
- par.type = para_Code;
- par.fpos = t.pos;
- while (1)
- {
- dtor(t), t = get_codepar_token(in);
- wd.type = word_WeakCode;
- wd.breaks = FALSE; /* shouldn't need this... */
- wd.text = ustrdup(t.text);
- wd.alt = NULL;
- wd.fpos = t.pos;
- addword(wd, &whptr);
- dtor(t), t = get_token(in);
- if (t.type == tok_white)
- {
- /*
- * The newline after a code-paragraph line
- */
- dtor(t), t = get_token(in);
- }
- if (t.type == tok_eop || t.type == tok_eof)
- break;
- else if (t.type != tok_cmd || t.cmd != c_c)
- {
- error(err_brokencodepara, &t.pos);
- addpara(par, ret);
- while (t.type != tok_eop) /* error recovery: */
- dtor(t), t = get_token(in); /* eat rest of paragraph */
- goto codeparabroken; /* ick, but such is life */
- }
- }
- addpara(par, ret);
- codeparabroken:
- continue;
- }
-
- while (t.type == tok_cmd && macrolookup(macros, in, t.text, &t.pos))
- {
- dtor(t), t = get_token(in);
- }
-
-
- /*
- * This token begins a paragraph. See if it's one of the
- * special commands that define a paragraph type.
- *
- * (note that \# is special in a way, and \nocite takes no
- * text)
- */
- par.type = para_Normal;
- if (t.type == tok_cmd)
- {
- int needkw=0;
- int is_macro = FALSE;
-
- par.fpos = t.pos;
- switch (t.cmd)
- {
- default:
- needkw = -1;
- break;
- case c__invalid:
- error(err_badparatype, t.text, &t.pos);
- needkw = 4;
- break;
- case c__comment:
- if (isbrace(in))
- break; /* `\#{': isn't a comment para */
- do
- {
- dtor(t), t = get_token(in);
- }
- while (t.type != tok_eop && t.type != tok_eof);
- continue; /* next paragraph */
- /*
- * `needkw' values:
- *
- * 1 -- exactly one keyword
- * 2 -- at least one keyword
- * 4 -- any number of keywords including zero
- * 8 -- at least one keyword and then nothing else
- * 16 -- nothing at all! no keywords, no body
- * 32 -- no keywords at all
- */
- case c_A:
- needkw = 2;
- par.type = para_Appendix;
- break;
- case c_B:
- needkw = 2;
- par.type = para_Biblio;
- break;
- case c_BR:
- needkw = 1;
- par.type = para_BR;
- start_cmd = c_BR;
- break;
- case c_C:
- needkw = 2;
- par.type = para_Chapter;
- break;
- case c_H:
- needkw = 2;
- par.type = para_Heading;
- par.aux = 0;
- break;
- case c_IM:
- needkw = 2;
- par.type = para_IM;
- start_cmd = c_IM;
- break;
- case c_S:
- needkw = 2;
- par.type = para_Subsect;
- par.aux = t.aux;
- break;
- case c_U:
- needkw = 32;
- par.type = para_UnnumberedChapter;
- break;
- /* For \b and \n the keyword is optional */
- case c_b:
- needkw = 4;
- par.type = para_Bullet;
- break;
- case c_n:
- needkw = 4;
- par.type = para_NumberedList;
- break;
- case c_cfg:
- needkw = 8;
- par.type = para_Config;
- start_cmd = c_cfg;
- break;
- case c_copyright:
- needkw = 32;
- par.type = para_Copyright;
- break;
- case c_define:
- is_macro = TRUE;
- needkw = 1;
- break;
- /* For \nocite the keyword is _everything_ */
- case c_nocite:
- needkw = 8;
- par.type = para_NoCite;
- break;
- case c_preamble:
- needkw = 32;
- par.type = para_Preamble;
- break;
- case c_rule:
- needkw = 16;
- par.type = para_Rule;
- break;
- case c_title:
- needkw = 32;
- par.type = para_Title;
- break;
- case c_versionid:
- needkw = 32;
- par.type = para_VersionID;
- break;
- }
-
- if (needkw > 0)
- {
- rdstring rs = { 0, 0, NULL };
- int nkeys = 0;
- filepos fp;
-
- /* Get keywords. */
- dtor(t), t = get_token(in);
- fp = t.pos;
- while (t.type == tok_lbrace)
- {
- /* This is a keyword. */
- nkeys++;
- /* FIXME: there will be bugs if anyone specifies an
- * empty keyword (\foo{}), so trap this case. */
- while (dtor(t), t = get_token(in),
- t.type == tok_word ||
- t.type == tok_white ||
- (t.type == tok_cmd && t.cmd == c__nbsp) ||
- (t.type == tok_cmd && t.cmd == c__escaped))
- {
- if (t.type == tok_white ||
- (t.type == tok_cmd && t.cmd == c__nbsp))
- rdadd(&rs, ' ');
- else
- rdadds(&rs, t.text);
- }
- if (t.type != tok_rbrace)
- {
- error(err_kwunclosed, &t.pos);
- continue;
- }
- rdadd(&rs, 0); /* add string terminator */
- dtor(t), t = get_token(in); /* eat right brace */
- }
-
- rdadd(&rs, 0); /* add string terminator */
-
- /* See whether we have the right number of keywords. */
- if ((needkw & 48) && nkeys > 0)
- error(err_kwillegal, &fp);
- if ((needkw & 11) && nkeys == 0)
- error(err_kwexpected, &fp);
- if ((needkw & 5) && nkeys > 1)
- error(err_kwtoomany, &fp);
-
- if (is_macro)
- {
- /*
- * Macro definition. Get the rest of the line
- * as a code-paragraph token, repeatedly until
- * there's nothing more left of it. Separate
- * with newlines.
- */
- rdstring macrotext = { 0, 0, NULL };
- while (1)
- {
- dtor(t), t = get_codepar_token(in);
- if (macrotext.pos > 0)
- rdadd(¯otext, L'\n');
- rdadds(¯otext, t.text);
- dtor(t), t = get_token(in);
- if (t.type == tok_eop)
- break;
- }
- macrodef(macros, rs.text, macrotext.text, fp);
- continue; /* next paragraph */
- }
-
- par.keyword = rdtrim(&rs);
-
- /* Move to EOP in case of needkw==8 or 16 (no body) */
- if (needkw & 24)
- {
- /* We allow whitespace even when we expect no para body */
- while (t.type == tok_white)
- dtor(t), t = get_token(in);
- if (t.type != tok_eop && t.type != tok_eof &&
- (start_cmd == c__invalid ||
- t.type != tok_cmd || t.cmd != start_cmd))
- {
- error(err_bodyillegal, &t.pos);
- /* Error recovery: eat the rest of the paragraph */
- while (t.type != tok_eop && t.type != tok_eof &&
- (start_cmd == c__invalid ||
- t.type != tok_cmd || t.cmd != start_cmd))
- dtor(t), t = get_token(in);
- }
- if (t.type == tok_cmd)
- already = TRUE; /* inhibit get_token at top of loop */
- addpara(par, ret);
- continue; /* next paragraph */
- }
- }
- }
-
- /*
- * Now read the actual paragraph, word by word, adding to
- * the paragraph list.
- *
- * Mid-paragraph commands:
- *
- * \K \k
- * \c \cw
- * \e
- * \i \ii
- * \I
- * \u
- * \W
- * \date
- * \\ \{ \}
- */
- parsestk = stk_new();
- style = word_Normal;
- spcstyle = word_WhiteSpace;
- indexing = FALSE;
- seenwhite = TRUE;
- while (t.type != tok_eop && t.type != tok_eof)
- {
- iswhite = FALSE;
- already = FALSE;
-
- /* Handle implicit paragraph breaks after \IM, \BR etc */
- if (start_cmd != c__invalid &&
- t.type == tok_cmd && t.cmd == start_cmd)
- {
- already = TRUE; /* inhibit get_token at top of loop */
- break;
- }
-
- if (t.type == tok_cmd && t.cmd == c__escaped)
- {
- t.type = tok_word; /* nice and simple */
- t.aux = 0; /* even if `\-' - nonbreaking! */
- }
- if (t.type == tok_cmd && t.cmd == c__nbsp)
- {
- t.type = tok_word; /* nice and simple */
- sfree(t.text);
- t.text = ustrdup(L" "); /* text is ` ' not `_' */
- t.aux = 0; /* (nonbreaking) */
- }
- switch (t.type)
- {
- case tok_white:
- if (whptr == &par.words)
- break; /* strip whitespace at start of para */
- wd.text = NULL;
- wd.type = spcstyle;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
-
- /*
- * Inhibit use of whitespace if it's (probably the
- * newline) before a repeat \IM / \BR type
- * directive.
- */
- if (start_cmd != c__invalid)
- {
- dtor(t), t = get_token(in);
- already = TRUE;
- if (t.type == tok_cmd && t.cmd == start_cmd)
- break;
- }
-
- if (indexing)
- rdadd(&indexstr, ' ');
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- addword(wd, &idximplicit);
- iswhite = TRUE;
- break;
- case tok_word:
- if (indexing)
- rdadds(&indexstr, t.text);
- wd.type = style;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- wd.breaks = t.aux;
- if (!indexing || index_visible)
- {
- wd.text = ustrdup(t.text);
- addword(wd, &whptr);
- }
- if (indexing)
- {
- wd.text = ustrdup(t.text);
- addword(wd, &idximplicit);
- }
- break;
- case tok_lbrace:
- error(err_unexbrace, &t.pos);
- /* Error recovery: push nop */
- sitem = mknew(struct stack_item);
- sitem->type = stack_nop;
- stk_push(parsestk, sitem);
- break;
- case tok_rbrace:
- sitem = stk_pop(parsestk);
- if (!sitem)
- error(err_unexbrace, &t.pos);
- else
- {
- if (sitem->type & stack_ualt)
- {
- whptr = sitem->whptr;
- idximplicit = sitem->idximplicit;
- }
- if (sitem->type & stack_style)
- {
- style = word_Normal;
- spcstyle = word_WhiteSpace;
- }
- if (sitem->type & stack_idx ) {
- indexword->text = ustrdup(indexstr.text);
- if (index_downcase)
- ustrlow(indexword->text);
- indexing = FALSE;
- rdadd(&indexstr, L'\0');
- index_merge(idx, FALSE, indexstr.text, idxwordlist);
- sfree(indexstr.text);
- }
- if (sitem->type & stack_hyper)
- {
- wd.text = NULL;
- wd.type = word_HyperEnd;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- addword(wd, &idximplicit);
- }
- if (sitem->type & stack_quote)
- {
- wd.text = NULL;
- wd.type = toquotestyle(style);
- wd.alt = NULL;
- wd.aux = quote_Close;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- {
- rdadd(&indexstr, L'"');
- addword(wd, &idximplicit);
- }
- }
- }
- sfree(sitem);
- break;
- case tok_cmd:
- switch (t.cmd)
- {
- case c__comment:
- /*
- * In-paragraph comment: \#{ balanced braces }
- *
- * Anything goes here; even tok_eop. We should
- * eat whitespace after the close brace _if_
- * there was whitespace before the \#.
- */
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- } else
- {
- int braces = 1;
- while (braces > 0)
- {
- dtor(t), t = get_token(in);
- if (t.type == tok_lbrace)
- braces++;
- else if (t.type == tok_rbrace)
- braces--;
- else if (t.type == tok_eof)
- {
- error(err_commenteof, &t.pos);
- break;
- }
- }
- }
- if (seenwhite)
- {
- already = TRUE;
- dtor(t), t = get_token(in);
- if (t.type == tok_white)
- {
- iswhite = TRUE;
- already = FALSE;
- }
- }
- break;
- case c_q:
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- } else
- {
- wd.text = NULL;
- wd.type = toquotestyle(style);
- wd.alt = NULL;
- wd.aux = quote_Open;
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (!indexing || index_visible)
- addword(wd, &whptr);
- if (indexing)
- {
- rdadd(&indexstr, L'"');
- addword(wd, &idximplicit);
- }
- sitem = mknew(struct stack_item);
- sitem->type = stack_quote;
- stk_push(parsestk, sitem);
- }
- break;
- case c_K:
- case c_k:
- case c_R:
- case c_W:
- case c_L:
- case c_date:
- /*
- * Keyword, hyperlink, or \date. We expect a
- * left brace, some text, and then a right
- * brace. No nesting; no arguments.
- */
- wd.fpos = t.pos;
- wd.breaks = FALSE;
- if (t.cmd == c_K)
- wd.type = word_UpperXref;
- else if (t.cmd == c_k)
- wd.type = word_LowerXref;
- else if (t.cmd == c_R)
- wd.type = word_FreeTextXref;
- else if (t.cmd == c_W)
- wd.type = word_HyperLink;
- else if (t.cmd == c_L)
- wd.type = word_LocalHyperLink;
- else
- wd.type = word_Normal;
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- if (wd.type == word_Normal)
- {
- time_t thetime = time(NULL);
- struct tm *broken = localtime(&thetime);
- already = TRUE;
- wdtext = ustrftime(NULL, broken);
- wd.type = style;
- } else
- {
- error(err_explbr, &t.pos);
- wdtext = NULL;
- }
- } else
- {
- rdstring rs = { 0, 0, NULL };
- while (dtor(t), t = get_token(in),
- t.type == tok_word || t.type == tok_white)
- {
- if (t.type == tok_white)
- rdadd(&rs, ' ');
- else
- rdadds(&rs, t.text);
- }
- if (wd.type == word_Normal)
- {
- time_t thetime = time(NULL);
- struct tm *broken = localtime(&thetime);
- wdtext = ustrftime(rs.text, broken);
- wd.type = style;
- } else
- {
- wdtext = ustrdup(rs.text);
- }
- sfree(rs.text);
- if (t.type != tok_rbrace)
- {
- error(err_kwexprbr, &t.pos);
- }
- }
- wd.alt = NULL;
- wd.aux = 0;
- if (!indexing || index_visible)
- {
- wd.text = ustrdup(wdtext);
- addword(wd, &whptr);
- }
- if (indexing)
- {
- wd.text = ustrdup(wdtext);
- addword(wd, &idximplicit);
- }
- sfree(wdtext);
- if (wd.type == word_FreeTextXref || wd.type == word_HyperLink || wd.type == word_LocalHyperLink)
- {
- /*
- * Hyperlinks are different: they then
- * expect another left brace, to begin
- * delimiting the text marked by the link.
- */
- dtor(t), t = get_token(in);
- /*
- * Special cases: \W{}\c, \W{}\e, \W{}\cw
- */
- sitem = mknew(struct stack_item);
- sitem->type = stack_hyper;
- if (t.type == tok_cmd &&
- (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
- {
- if (style != word_Normal)
- error(err_nestedstyles, &t.pos);
- else
- {
- style = (t.cmd == c_c ? word_Code :
- t.cmd == c_cw ? word_WeakCode : word_Emph);
- spcstyle = tospacestyle(style);
- sitem->type |= stack_style;
- }
- dtor(t), t = get_token(in);
- }
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- sfree(sitem);
- } else
- {
- stk_push(parsestk, sitem);
- }
- }
- break;
- case c_c:
- case c_cw:
- case c_e:
- type = t.cmd;
- if (style != word_Normal)
- {
- error(err_nestedstyles, &t.pos);
- /* Error recovery: eat lbrace, push nop. */
- dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
- sitem->type = stack_nop;
- stk_push(parsestk, sitem);
- }
- dtor(t), t = get_token(in);
- if (t.type != tok_lbrace)
- {
- error(err_explbr, &t.pos);
- } else
- {
- style = (type == c_c ? word_Code :
- type == c_cw ? word_WeakCode : word_Emph);
- spcstyle = tospacestyle(style);
- sitem = mknew(struct stack_item);
- sitem->type = stack_style;
- stk_push(parsestk, sitem);
- }
- break;
- case c_i:
- case c_ii:
- case c_I:
- type = t.cmd;
- if (indexing)
- {
- error(err_nestedindex, &t.pos);
- /* Error recovery: eat lbrace, push nop. */
- dtor(t), t = get_token(in);
- sitem = mknew(struct stack_item);
- sitem->type = stack_nop;
- stk_push(parsestk, sitem);
- }
- sitem = mknew(struct stack_item);
- sitem->type = stack_idx;
- dtor(t), t = get_token(in);
- /*
- * Special cases: \i\c, \i\e, \i\cw
- */
- wd.fpos = t.pos;
- if (t.type == tok_cmd &&
- (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw))
- {
- if (style != word_Normal)
- error(err_nestedstyles, &t.pos);
- else
- {
- style = (t.cmd == c_c ? word_Code :
- t.cmd == c_cw ? word_WeakCode : word_Emph);
- spcstyle = tospacestyle(style);
- sitem->type |= stack_style;
- }
- dtor(t), t = get_token(in);
- }
- if (t.type != tok_lbrace)
- {
- sfree(sitem);
- error(err_explbr, &t.pos);
- } else
- {
- /* Add an index-reference word with no text as yet */
- wd.type = word_IndexRef;
- wd.text = NULL;
- wd.alt = NULL;
- wd.aux = 0;
- wd.breaks = FALSE;
- indexword = addword(wd, &whptr);
- /* Set up a rdstring to read the index text */
- indexstr = nullrs;
- /* Flags so that we do the Right Things with text */
- index_visible = (type != c_I);
- index_downcase = (type == c_ii);
- indexing = TRUE;
- idxwordlist = NULL;
- idximplicit = &idxwordlist;
- /* Stack item to close the indexing on exit */
- stk_push(parsestk, sitem);
- }
- break;
- case c_u:
- uchr = t.aux;
- utext[0] = uchr;
- utext[1] = 0;
- wd.type = style;
- wd.breaks = FALSE;
- wd.alt = NULL;
- wd.aux = 0;
- wd.fpos = t.pos;
- if (!indexing || index_visible)
- {
- wd.text = ustrdup(utext);
- uword = addword(wd, &whptr);
- } else
- uword = NULL;
- if (indexing)
- {
- wd.text = ustrdup(utext);
- iword = addword(wd, &idximplicit);
- } else
- iword = NULL;
- dtor(t), t = get_token(in);
- if (t.type == tok_lbrace)
- {
- /*
- * \u with a left brace. Until the brace
- * closes, all further words go on a
- * sidetrack from the main thread of the
- * paragraph.
- */
- sitem = mknew(struct stack_item);
- sitem->type = stack_ualt;
- sitem->whptr = whptr;
- sitem->idximplicit = idximplicit;
- stk_push(parsestk, sitem);
- whptr = uword ? &uword->alt : NULL;
- idximplicit = iword ? &iword->alt : NULL;
- } else
- {
- if (indexing)
- rdadd(&indexstr, uchr);
- already = TRUE;
- }
- break;
- default:
- if (!macrolookup(macros, in, t.text, &t.pos))
- error(err_badmidcmd, t.text, &t.pos);
- break;
- }
- }
- if (!already)
- dtor(t), t = get_token(in);
- seenwhite = iswhite;
- }
- /* Check the stack is empty */
- if (NULL != (sitem = stk_pop(parsestk)))
- {
- do
- {
- sfree(sitem);
- sitem = stk_pop(parsestk);
- }
- while (sitem);
- error(err_missingrbrace, &t.pos);
- }
- stk_free(parsestk);
- addpara(par, ret);
- }
-
- /*
- * We break to here rather than returning, because otherwise
- * this cleanup doesn't happen.
- */
- dtor(t);
- macrocleanup(macros);
-}
-
-paragraph *read_input(input * in, indexdata * idx)
-{
- paragraph *head = NULL;
- paragraph **hptr = &head;
-
- while (in->currindex < in->nfiles)
- {
- in->currfp = fopen(in->filenames[in->currindex], "r");
- if (in->currfp)
- {
- setpos(in, in->filenames[in->currindex]);
- read_file(&hptr, in, idx);
- }
- in->currindex++;
- }
-
- return head;
-}
+/* + * input.c: read the source form + */ + +#include <stdio.h> +#include <assert.h> +#include <time.h> +#include "halibut.h" + +#define TAB_STOP 8 /* for column number tracking */ + +static void setpos(input * in, char *fname) +{ + in->pos.filename = fname; + in->pos.line = 1; + in->pos.col = (in->reportcols ? 1 : -1); +} + +static void unget(input * in, int c, filepos * pos) +{ + if (in->npushback >= in->pushbacksize) + { + in->pushbacksize = in->npushback + 16; + in->pushback = resize(in->pushback, in->pushbacksize); + } + in->pushback[in->npushback].chr = c; + in->pushback[in->npushback].pos = *pos; /* structure copy */ + in->npushback++; +} + +/* ---------------------------------------------------------------------- */ +/* + * Macro subsystem + */ +typedef struct macro_Tag macro; +struct macro_Tag { + wchar_t *name, *text; +}; +struct macrostack_Tag { + macrostack *next; + wchar_t *text; + int ptr, npushback; + filepos pos; +}; +static int macrocmp(void *av, void *bv) +{ + macro *a = (macro *) av, *b = (macro *) bv; + return ustrcmp(a->name, b->name); +} +static void +macrodef(tree234 * macros, wchar_t * name, wchar_t * text, filepos fpos) +{ + macro *m = mknew(macro); + m->name = name; + m->text = text; + if (add234(macros, m) != m) + { + error(err_macroexists, &fpos, name); + sfree(name); + sfree(text); + } +} +static int +macrolookup(tree234 * macros, input * in, wchar_t * name, filepos * pos) +{ + macro m, *gotit; + m.name = name; + gotit = find234(macros, &m, NULL); + if (gotit) + { + macrostack *expansion = mknew(macrostack); + expansion->next = in->stack; + expansion->text = gotit->text; + expansion->pos = *pos; /* structure copy */ + expansion->ptr = 0; + expansion->npushback = in->npushback; + in->stack = expansion; + return TRUE; + } else + return FALSE; +} +static void macrocleanup(tree234 * macros) +{ + int ti; + macro *m; + for (ti = 0; (m = (macro *) index234(macros, ti)) != NULL; ti++) + { + sfree(m->name); + sfree(m->text); + sfree(m); + } + freetree234(macros); +} + +/* + * Can return EOF + */ +static int get(input * in, filepos * pos) +{ + int pushbackpt = in->stack ? in->stack->npushback : 0; + if (in->npushback > pushbackpt) + { + --in->npushback; + if (pos) + *pos = in->pushback[in->npushback].pos; /* structure copy */ + return in->pushback[in->npushback].chr; + } else if (in->stack) + { + wchar_t c = in->stack->text[in->stack->ptr]; + if (in->stack->text[++in->stack->ptr] == L'\0') + { + macrostack *tmp = in->stack; + in->stack = tmp->next; + sfree(tmp); + } + return c; + } else if (in->currfp) + { + int c = getc(in->currfp); + + if (c == EOF) + { + fclose(in->currfp); + in->currfp = NULL; + } + /* Track line numbers, for error reporting */ + if (pos) + *pos = in->pos; + if (in->reportcols) + { + switch (c) + { + case '\t': + in->pos.col = 1 + (in->pos.col + TAB_STOP - 1) % TAB_STOP; + break; + case '\n': + in->pos.col = 1; + in->pos.line++; + break; + default: + in->pos.col++; + break; + } + } else + { + in->pos.col = -1; + if (c == '\n') + in->pos.line++; + } + /* FIXME: do input charmap translation. We should be returning + * Unicode here. */ + return c; + } else + return EOF; +} + +/* + * Lexical analysis of source files. + */ +typedef struct token_Tag token; +struct token_Tag { + int type; + int cmd, aux; + wchar_t *text; + filepos pos; +}; +enum { + tok_eof, /* end of file */ + tok_eop, /* end of paragraph */ + tok_white, /* whitespace */ + tok_word, /* a word or word fragment */ + tok_cmd, /* \command */ + tok_lbrace, /* { */ + tok_rbrace /* } */ +}; + +/* Halibut command keywords. */ +enum { + c__invalid, /* invalid command */ + c__comment, /* comment command (\#) */ + c__escaped, /* escaped character */ + c__nbsp, /* nonbreaking space */ + c_A, /* appendix heading */ + c_B, /* bibliography entry */ + c_BR, /* bibliography rewrite */ + c_C, /* chapter heading */ + c_H, /* heading */ + c_I, /* invisible index mark */ + c_IM, /* index merge/rewrite */ + c_K, /* capitalised cross-reference */ + c_S, /* aux field is 0, 1, 2, ... */ + c_U, /* unnumbered-chapter heading */ + c_W, /* Web hyperlink */ + c_L, /* Relative/local hyperlink */ + c_b, /* bulletted list */ + c_c, /* code */ + c_cfg, /* configuration directive */ + c_copyright, /* copyright statement */ + c_cw, /* weak code */ + c_date, /* document processing date */ + c_define, /* macro definition */ + c_e, /* emphasis */ + c_i, /* visible index mark */ + c_ii, /* uncapitalised visible index mark */ + c_k, /* uncapitalised cross-reference */ + c_R, /* free text cross-reference */ + c_n, /* numbered list */ + c_nocite, /* bibliography trickery */ + c_preamble, /* document preamble text */ + c_q, /* quote marks */ + c_rule, /* horizontal rule */ + c_title, /* document title */ + c_u, /* aux field is char code */ + c_versionid /* document RCS id */ +}; + +/* Perhaps whitespace should be defined in a more Unicode-friendly way? */ +#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 ) +#define isnl(c) ( (c)==10 ) +#define isdec(c) ( ((c)>='0'&&(c)<='9') ) +#define fromdec(c) ( (c)-'0' ) +#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f')) +#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) ) +#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z')) + +/* + * Keyword comparison function. Like strcmp, but between a wchar_t * + * and a char *. + */ +static int kwcmp(wchar_t const *p, char const *q) +{ + int i; + do + { + i = *p - *q; + } + while (*p++ && *q++ && !i); + return i; +} + +/* + * Match a keyword. + */ +static void match_kw(token * tok) +{ + /* + * FIXME. The ids are explicit in here so as to allow long-name + * equivalents to the various very short keywords. + * + * This list must be sorted, it's searched using binary search. + */ + static const struct { + char const *name; + int id; + } keywords[] = { + { + "#", c__comment} + , /* comment command (\#) */ + { + "-", c__escaped} + , /* nonbreaking hyphen */ + { + "A", c_A} + , /* appendix heading */ + { + "B", c_B} + , /* bibliography entry */ + { + "BR", c_BR} + , /* bibliography rewrite */ + { + "C", c_C} + , /* chapter heading */ + { + "H", c_H} + , /* heading */ + { + "I", c_I} + , /* invisible index mark */ + { + "IM", c_IM} + , /* index merge/rewrite */ + { + "K", c_K} + , /* capitalised cross-reference */ + { + "L", c_L} + , /* Relative/local hyperlink */ + { + "R", c_R} + , /* free text cross-reference */ + { + "U", c_U} + , /* unnumbered-chapter heading */ + { + "W", c_W} + , /* Web hyperlink */ + { + "\\", c__escaped} + , /* escaped backslash (\\) */ + { + "_", c__nbsp} + , /* nonbreaking space (\_) */ + { + "b", c_b} + , /* bulletted list */ + { + "c", c_c} + , /* code */ + { + "cfg", c_cfg} + , /* configuration directive */ + { + "copyright", c_copyright} + , /* copyright statement */ + { + "cw", c_cw} + , /* weak code */ + { + "date", c_date} + , /* document processing date */ + { + "define", c_define} + , /* macro definition */ + { + "e", c_e} + , /* emphasis */ + { + "i", c_i} + , /* visible index mark */ + { + "ii", c_ii} + , /* uncapitalised visible index mark */ + { + "k", c_k} + , /* uncapitalised cross-reference */ + { + "n", c_n} + , /* numbered list */ + { + "nocite", c_nocite} + , /* bibliography trickery */ + { + "preamble", c_preamble} + , /* document preamble text */ + { + "q", c_q} + , /* quote marks */ + { + "rule", c_rule} + , /* horizontal rule */ + { + "title", c_title} + , /* document title */ + { + "versionid", c_versionid} + , /* document RCS id */ + { + "{", c__escaped} + , /* escaped lbrace (\{) */ + { + "}", c__escaped} + , /* escaped rbrace (\}) */ + }; + int i, j, k, c; + + /* + * Special cases: \S{0,1,2,...} and \uABCD. If the syntax + * doesn't match correctly, we just fall through to the + * binary-search phase. + */ + if (tok->text[0] == 'S') + { + /* We expect numeric characters thereafter. */ + wchar_t *p = tok->text + 1; + int n; + if (!*p) + n = 1; + else + { + n = 0; + while (*p && isdec(*p)) + { + n = 10 * n + fromdec(*p); + p++; + } + } + if (!*p) + { + tok->cmd = c_S; + tok->aux = n; + return; + } + } else if (tok->text[0] == 'u') + { + /* We expect hex characters thereafter. */ + wchar_t *p = tok->text + 1; + int n = 0; + while (*p && ishex(*p)) + { + n = 16 * n + fromhex(*p); + p++; + } + if (!*p) + { + tok->cmd = c_u; + tok->aux = n; + return; + } + } + + i = -1; + j = sizeof(keywords) / sizeof(*keywords); + while (j - i > 1) + { + k = (i + j) / 2; + c = kwcmp(tok->text, keywords[k].name); + if (c < 0) + j = k; + else if (c > 0) + i = k; + else + { /* c == 0 */ + + tok->cmd = keywords[k].id; + return; + } + } + + tok->cmd = c__invalid; +} + + +/* + * Read a token from the input file, in the normal way (`normal' in + * the sense that code paragraphs work a different way). + */ +token get_token(input * in) +{ + int c; + int nls; + token ret; + rdstring rs = { 0, 0, NULL }; + filepos cpos; + + ret.cmd = c__invalid; + ret.aux = FALSE; + ret.text = NULL; /* default */ + c = get(in, &cpos); + ret.pos = cpos; + if (iswhite(c)) + { /* tok_white or tok_eop */ + nls = 0; + do + { + if (isnl(c)) + nls++; + } + while ((c = get(in, &cpos)) != EOF && iswhite(c)); + if (c == EOF) + { + ret.type = tok_eof; + return ret; + } + unget(in, c, &cpos); + ret.type = (nls > 1 ? tok_eop : tok_white); + return ret; + } else if (c == EOF) + { /* tok_eof */ + ret.type = tok_eof; + return ret; + } else if (c == '\\') + { /* tok_cmd */ + c = get(in, &cpos); + if (c == '-' || c == '\\' || c == '_' || + c == '#' || c == '{' || c == '}') + { + /* single-char command */ + rdadd(&rs, (wchar_t)c); + } else if (c == 'u') + { + int len = 0; + do + { + rdadd(&rs, (wchar_t)c); + len++; + c = get(in, &cpos); + } + while (ishex(c) && len < 5); + unget(in, c, &cpos); + } else if (iscmd(c)) + { + do + { + rdadd(&rs, (wchar_t)c); + c = get(in, &cpos); + } + while (iscmd(c)); + unget(in, c, &cpos); + } + /* + * Now match the command against the list of available + * ones. + */ + ret.type = tok_cmd; + ret.text = ustrdup(rs.text); + match_kw(&ret); + sfree(rs.text); + return ret; + } else if (c == '{') + { /* tok_lbrace */ + ret.type = tok_lbrace; + return ret; + } else if (c == '}') + { /* tok_rbrace */ + ret.type = tok_rbrace; + return ret; + } else + { /* tok_word */ + /* + * Read a word: the longest possible contiguous sequence of + * things other than whitespace, backslash, braces and + * hyphen. A hyphen terminates the word but is returned as + * part of it; everything else is pushed back for the next + * token. The `aux' field contains TRUE if the word ends in + * a hyphen. + */ + ret.aux = FALSE; /* assumed for now */ + while (1) + { + if (iswhite(c) || c == '{' || c == '}' || c == '\\' || c == EOF) + { + /* Put back the character that caused termination */ + unget(in, c, &cpos); + break; + } else + { + rdadd(&rs, (wchar_t)c); + if (c == '-') + { + ret.aux = TRUE; + break; /* hyphen terminates word */ + } + } + c = get(in, &cpos); + } + ret.type = tok_word; + ret.text = ustrdup(rs.text); + sfree(rs.text); + return ret; + } +} + +/* + * Determine whether the next input character is an open brace (for + * telling code paragraphs from paragraphs which merely start with + * code). + */ +int isbrace(input * in) +{ + int c; + filepos cpos; + + c = get(in, &cpos); + unget(in, c, &cpos); + return (c == '{'); +} + +/* + * Read the rest of a line that starts `\c'. Including nothing at + * all (tok_word with empty text). + */ +token get_codepar_token(input * in) +{ + int c; + token ret; + rdstring rs = { 0, 0, NULL }; + filepos cpos; + + ret.type = tok_word; + c = get(in, &cpos); /* expect (and discard) one space */ + ret.pos = cpos; + if (c == ' ') + { + c = get(in, &cpos); + ret.pos = cpos; + } + while (!isnl(c) && c != EOF) + { + int c2 = c; + c = get(in, &cpos); + /* Discard \r just before \n. */ + if (c2 != 13 || !isnl(c)) + rdadd(&rs, (wchar_t)c2); + } + unget(in, c, &cpos); + ret.text = ustrdup(rs.text); + sfree(rs.text); + return ret; +} + +/* + * Adds a new word to a linked list + */ +static word *addword(word newword, word *** hptrptr) +{ + word *mnewword; + if (!hptrptr) + return NULL; + mnewword = mknew(word); + *mnewword = newword; /* structure copy */ + mnewword->next = NULL; + **hptrptr = mnewword; + *hptrptr = &mnewword->next; + return mnewword; +} + +/* + * Adds a new paragraph to a linked list + */ +static paragraph *addpara(paragraph newpara, paragraph *** hptrptr) +{ + paragraph *mnewpara = mknew(paragraph); + *mnewpara = newpara; /* structure copy */ + mnewpara->next = NULL; + **hptrptr = mnewpara; + *hptrptr = &mnewpara->next; + return mnewpara; +} + +/* + * Destructor before token is reassigned; should catch most memory + * leaks + */ +#define dtor(t) ( sfree(t.text) ) + +/* + * Reads a single file (ie until get() returns EOF) + */ +static void read_file(paragraph *** ret, input * in, indexdata * idx) +{ + token t; + paragraph par; + word wd, **whptr, **idximplicit; + tree234 *macros; + wchar_t utext[2], *wdtext; + int style, spcstyle; + int already; + int iswhite, seenwhite; + int type; + struct stack_item { + enum { + stack_nop = 0, /* do nothing (for error recovery) */ + stack_ualt = 1, /* \u alternative */ + stack_style = 2, /* \e, \c, \cw */ + stack_idx = 4, /* \I, \i, \ii */ + stack_hyper = 8, /* \W */ + stack_quote = 16, /* \q */ + } type; + word **whptr; /* to restore from \u alternatives */ + word **idximplicit; /* to restore from \u alternatives */ + } *sitem; + stack parsestk; + word *indexword=NULL, *uword=NULL, *iword=NULL; + word *idxwordlist; + rdstring indexstr; + int index_downcase=0, index_visible=0, indexing=0; + const rdstring nullrs = { 0, 0, NULL }; + wchar_t uchr; + + t.text = NULL; + macros = newtree234(macrocmp); + already = FALSE; + + /* + * Loop on each paragraph. + */ + while (1) + { + int start_cmd = c__invalid; + par.words = NULL; + par.keyword = NULL; + whptr = &par.words; + + /* + * Get a token. + */ + if (!already) + { + dtor(t), t = get_token(in); + } + already = FALSE; + if (t.type == tok_eof) + break; + + /* + * Parse code paragraphs separately. + */ + if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) + { + par.type = para_Code; + par.fpos = t.pos; + while (1) + { + dtor(t), t = get_codepar_token(in); + wd.type = word_WeakCode; + wd.breaks = FALSE; /* shouldn't need this... */ + wd.text = ustrdup(t.text); + wd.alt = NULL; + wd.fpos = t.pos; + addword(wd, &whptr); + dtor(t), t = get_token(in); + if (t.type == tok_white) + { + /* + * The newline after a code-paragraph line + */ + dtor(t), t = get_token(in); + } + if (t.type == tok_eop || t.type == tok_eof) + break; + else if (t.type != tok_cmd || t.cmd != c_c) + { + error(err_brokencodepara, &t.pos); + addpara(par, ret); + while (t.type != tok_eop) /* error recovery: */ + dtor(t), t = get_token(in); /* eat rest of paragraph */ + goto codeparabroken; /* ick, but such is life */ + } + } + addpara(par, ret); + codeparabroken: + continue; + } + + while (t.type == tok_cmd && macrolookup(macros, in, t.text, &t.pos)) + { + dtor(t), t = get_token(in); + } + + + /* + * This token begins a paragraph. See if it's one of the + * special commands that define a paragraph type. + * + * (note that \# is special in a way, and \nocite takes no + * text) + */ + par.type = para_Normal; + if (t.type == tok_cmd) + { + int needkw=0; + int is_macro = FALSE; + + par.fpos = t.pos; + switch (t.cmd) + { + default: + needkw = -1; + break; + case c__invalid: + error(err_badparatype, t.text, &t.pos); + needkw = 4; + break; + case c__comment: + if (isbrace(in)) + break; /* `\#{': isn't a comment para */ + do + { + dtor(t), t = get_token(in); + } + while (t.type != tok_eop && t.type != tok_eof); + continue; /* next paragraph */ + /* + * `needkw' values: + * + * 1 -- exactly one keyword + * 2 -- at least one keyword + * 4 -- any number of keywords including zero + * 8 -- at least one keyword and then nothing else + * 16 -- nothing at all! no keywords, no body + * 32 -- no keywords at all + */ + case c_A: + needkw = 2; + par.type = para_Appendix; + break; + case c_B: + needkw = 2; + par.type = para_Biblio; + break; + case c_BR: + needkw = 1; + par.type = para_BR; + start_cmd = c_BR; + break; + case c_C: + needkw = 2; + par.type = para_Chapter; + break; + case c_H: + needkw = 2; + par.type = para_Heading; + par.aux = 0; + break; + case c_IM: + needkw = 2; + par.type = para_IM; + start_cmd = c_IM; + break; + case c_S: + needkw = 2; + par.type = para_Subsect; + par.aux = t.aux; + break; + case c_U: + needkw = 32; + par.type = para_UnnumberedChapter; + break; + /* For \b and \n the keyword is optional */ + case c_b: + needkw = 4; + par.type = para_Bullet; + break; + case c_n: + needkw = 4; + par.type = para_NumberedList; + break; + case c_cfg: + needkw = 8; + par.type = para_Config; + start_cmd = c_cfg; + break; + case c_copyright: + needkw = 32; + par.type = para_Copyright; + break; + case c_define: + is_macro = TRUE; + needkw = 1; + break; + /* For \nocite the keyword is _everything_ */ + case c_nocite: + needkw = 8; + par.type = para_NoCite; + break; + case c_preamble: + needkw = 32; + par.type = para_Preamble; + break; + case c_rule: + needkw = 16; + par.type = para_Rule; + break; + case c_title: + needkw = 32; + par.type = para_Title; + break; + case c_versionid: + needkw = 32; + par.type = para_VersionID; + break; + } + + if (needkw > 0) + { + rdstring rs = { 0, 0, NULL }; + int nkeys = 0; + filepos fp; + + /* Get keywords. */ + dtor(t), t = get_token(in); + fp = t.pos; + while (t.type == tok_lbrace) + { + /* This is a keyword. */ + nkeys++; + /* FIXME: there will be bugs if anyone specifies an + * empty keyword (\foo{}), so trap this case. */ + while (dtor(t), t = get_token(in), + t.type == tok_word || + t.type == tok_white || + (t.type == tok_cmd && t.cmd == c__nbsp) || + (t.type == tok_cmd && t.cmd == c__escaped)) + { + if (t.type == tok_white || + (t.type == tok_cmd && t.cmd == c__nbsp)) + rdadd(&rs, ' '); + else + rdadds(&rs, t.text); + } + if (t.type != tok_rbrace) + { + error(err_kwunclosed, &t.pos); + continue; + } + rdadd(&rs, 0); /* add string terminator */ + dtor(t), t = get_token(in); /* eat right brace */ + } + + rdadd(&rs, 0); /* add string terminator */ + + /* See whether we have the right number of keywords. */ + if ((needkw & 48) && nkeys > 0) + error(err_kwillegal, &fp); + if ((needkw & 11) && nkeys == 0) + error(err_kwexpected, &fp); + if ((needkw & 5) && nkeys > 1) + error(err_kwtoomany, &fp); + + if (is_macro) + { + /* + * Macro definition. Get the rest of the line + * as a code-paragraph token, repeatedly until + * there's nothing more left of it. Separate + * with newlines. + */ + rdstring macrotext = { 0, 0, NULL }; + while (1) + { + dtor(t), t = get_codepar_token(in); + if (macrotext.pos > 0) + rdadd(¯otext, L'\n'); + rdadds(¯otext, t.text); + dtor(t), t = get_token(in); + if (t.type == tok_eop) + break; + } + macrodef(macros, rs.text, macrotext.text, fp); + continue; /* next paragraph */ + } + + par.keyword = rdtrim(&rs); + + /* Move to EOP in case of needkw==8 or 16 (no body) */ + if (needkw & 24) + { + /* We allow whitespace even when we expect no para body */ + while (t.type == tok_white) + dtor(t), t = get_token(in); + if (t.type != tok_eop && t.type != tok_eof && + (start_cmd == c__invalid || + t.type != tok_cmd || t.cmd != start_cmd)) + { + error(err_bodyillegal, &t.pos); + /* Error recovery: eat the rest of the paragraph */ + while (t.type != tok_eop && t.type != tok_eof && + (start_cmd == c__invalid || + t.type != tok_cmd || t.cmd != start_cmd)) + dtor(t), t = get_token(in); + } + if (t.type == tok_cmd) + already = TRUE; /* inhibit get_token at top of loop */ + addpara(par, ret); + continue; /* next paragraph */ + } + } + } + + /* + * Now read the actual paragraph, word by word, adding to + * the paragraph list. + * + * Mid-paragraph commands: + * + * \K \k + * \c \cw + * \e + * \i \ii + * \I + * \u + * \W + * \date + * \\ \{ \} + */ + parsestk = stk_new(); + style = word_Normal; + spcstyle = word_WhiteSpace; + indexing = FALSE; + seenwhite = TRUE; + while (t.type != tok_eop && t.type != tok_eof) + { + iswhite = FALSE; + already = FALSE; + + /* Handle implicit paragraph breaks after \IM, \BR etc */ + if (start_cmd != c__invalid && + t.type == tok_cmd && t.cmd == start_cmd) + { + already = TRUE; /* inhibit get_token at top of loop */ + break; + } + + if (t.type == tok_cmd && t.cmd == c__escaped) + { + t.type = tok_word; /* nice and simple */ + t.aux = 0; /* even if `\-' - nonbreaking! */ + } + if (t.type == tok_cmd && t.cmd == c__nbsp) + { + t.type = tok_word; /* nice and simple */ + sfree(t.text); + t.text = ustrdup(L" "); /* text is ` ' not `_' */ + t.aux = 0; /* (nonbreaking) */ + } + switch (t.type) + { + case tok_white: + if (whptr == &par.words) + break; /* strip whitespace at start of para */ + wd.text = NULL; + wd.type = spcstyle; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = FALSE; + + /* + * Inhibit use of whitespace if it's (probably the + * newline) before a repeat \IM / \BR type + * directive. + */ + if (start_cmd != c__invalid) + { + dtor(t), t = get_token(in); + already = TRUE; + if (t.type == tok_cmd && t.cmd == start_cmd) + break; + } + + if (indexing) + rdadd(&indexstr, ' '); + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) + addword(wd, &idximplicit); + iswhite = TRUE; + break; + case tok_word: + if (indexing) + rdadds(&indexstr, t.text); + wd.type = style; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = t.aux; + if (!indexing || index_visible) + { + wd.text = ustrdup(t.text); + addword(wd, &whptr); + } + if (indexing) + { + wd.text = ustrdup(t.text); + addword(wd, &idximplicit); + } + break; + case tok_lbrace: + error(err_unexbrace, &t.pos); + /* Error recovery: push nop */ + sitem = mknew(struct stack_item); + sitem->type = stack_nop; + stk_push(parsestk, sitem); + break; + case tok_rbrace: + sitem = stk_pop(parsestk); + if (!sitem) + error(err_unexbrace, &t.pos); + else + { + if (sitem->type & stack_ualt) + { + whptr = sitem->whptr; + idximplicit = sitem->idximplicit; + } + if (sitem->type & stack_style) + { + style = word_Normal; + spcstyle = word_WhiteSpace; + } + if (sitem->type & stack_idx ) { + indexword->text = ustrdup(indexstr.text); + if (index_downcase) + ustrlow(indexword->text); + indexing = FALSE; + rdadd(&indexstr, L'\0'); + index_merge(idx, FALSE, indexstr.text, idxwordlist); + sfree(indexstr.text); + } + if (sitem->type & stack_hyper) + { + wd.text = NULL; + wd.type = word_HyperEnd; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) + addword(wd, &idximplicit); + } + if (sitem->type & stack_quote) + { + wd.text = NULL; + wd.type = toquotestyle(style); + wd.alt = NULL; + wd.aux = quote_Close; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) + { + rdadd(&indexstr, L'"'); + addword(wd, &idximplicit); + } + } + } + sfree(sitem); + break; + case tok_cmd: + switch (t.cmd) + { + case c__comment: + /* + * In-paragraph comment: \#{ balanced braces } + * + * Anything goes here; even tok_eop. We should + * eat whitespace after the close brace _if_ + * there was whitespace before the \#. + */ + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) + { + error(err_explbr, &t.pos); + } else + { + int braces = 1; + while (braces > 0) + { + dtor(t), t = get_token(in); + if (t.type == tok_lbrace) + braces++; + else if (t.type == tok_rbrace) + braces--; + else if (t.type == tok_eof) + { + error(err_commenteof, &t.pos); + break; + } + } + } + if (seenwhite) + { + already = TRUE; + dtor(t), t = get_token(in); + if (t.type == tok_white) + { + iswhite = TRUE; + already = FALSE; + } + } + break; + case c_q: + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) + { + error(err_explbr, &t.pos); + } else + { + wd.text = NULL; + wd.type = toquotestyle(style); + wd.alt = NULL; + wd.aux = quote_Open; + wd.fpos = t.pos; + wd.breaks = FALSE; + if (!indexing || index_visible) + addword(wd, &whptr); + if (indexing) + { + rdadd(&indexstr, L'"'); + addword(wd, &idximplicit); + } + sitem = mknew(struct stack_item); + sitem->type = stack_quote; + stk_push(parsestk, sitem); + } + break; + case c_K: + case c_k: + case c_R: + case c_W: + case c_L: + case c_date: + /* + * Keyword, hyperlink, or \date. We expect a + * left brace, some text, and then a right + * brace. No nesting; no arguments. + */ + wd.fpos = t.pos; + wd.breaks = FALSE; + if (t.cmd == c_K) + wd.type = word_UpperXref; + else if (t.cmd == c_k) + wd.type = word_LowerXref; + else if (t.cmd == c_R) + wd.type = word_FreeTextXref; + else if (t.cmd == c_W) + wd.type = word_HyperLink; + else if (t.cmd == c_L) + wd.type = word_LocalHyperLink; + else + wd.type = word_Normal; + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) + { + if (wd.type == word_Normal) + { + time_t thetime = time(NULL); + struct tm *broken = localtime(&thetime); + already = TRUE; + wdtext = ustrftime(NULL, broken); + wd.type = style; + } else + { + error(err_explbr, &t.pos); + wdtext = NULL; + } + } else + { + rdstring rs = { 0, 0, NULL }; + while (dtor(t), t = get_token(in), + t.type == tok_word || t.type == tok_white) + { + if (t.type == tok_white) + rdadd(&rs, ' '); + else + rdadds(&rs, t.text); + } + if (wd.type == word_Normal) + { + time_t thetime = time(NULL); + struct tm *broken = localtime(&thetime); + wdtext = ustrftime(rs.text, broken); + wd.type = style; + } else + { + wdtext = ustrdup(rs.text); + } + sfree(rs.text); + if (t.type != tok_rbrace) + { + error(err_kwexprbr, &t.pos); + } + } + wd.alt = NULL; + wd.aux = 0; + if (!indexing || index_visible) + { + wd.text = ustrdup(wdtext); + addword(wd, &whptr); + } + if (indexing) + { + wd.text = ustrdup(wdtext); + addword(wd, &idximplicit); + } + sfree(wdtext); + if (wd.type == word_FreeTextXref || wd.type == word_HyperLink || wd.type == word_LocalHyperLink) + { + /* + * Hyperlinks are different: they then + * expect another left brace, to begin + * delimiting the text marked by the link. + */ + dtor(t), t = get_token(in); + /* + * Special cases: \W{}\c, \W{}\e, \W{}\cw + */ + sitem = mknew(struct stack_item); + sitem->type = stack_hyper; + if (t.type == tok_cmd && + (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) + { + if (style != word_Normal) + error(err_nestedstyles, &t.pos); + else + { + style = (t.cmd == c_c ? word_Code : + t.cmd == c_cw ? word_WeakCode : word_Emph); + spcstyle = tospacestyle(style); + sitem->type |= stack_style; + } + dtor(t), t = get_token(in); + } + if (t.type != tok_lbrace) + { + error(err_explbr, &t.pos); + sfree(sitem); + } else + { + stk_push(parsestk, sitem); + } + } + break; + case c_c: + case c_cw: + case c_e: + type = t.cmd; + if (style != word_Normal) + { + error(err_nestedstyles, &t.pos); + /* Error recovery: eat lbrace, push nop. */ + dtor(t), t = get_token(in); + sitem = mknew(struct stack_item); + sitem->type = stack_nop; + stk_push(parsestk, sitem); + } + dtor(t), t = get_token(in); + if (t.type != tok_lbrace) + { + error(err_explbr, &t.pos); + } else + { + style = (type == c_c ? word_Code : + type == c_cw ? word_WeakCode : word_Emph); + spcstyle = tospacestyle(style); + sitem = mknew(struct stack_item); + sitem->type = stack_style; + stk_push(parsestk, sitem); + } + break; + case c_i: + case c_ii: + case c_I: + type = t.cmd; + if (indexing) + { + error(err_nestedindex, &t.pos); + /* Error recovery: eat lbrace, push nop. */ + dtor(t), t = get_token(in); + sitem = mknew(struct stack_item); + sitem->type = stack_nop; + stk_push(parsestk, sitem); + } + sitem = mknew(struct stack_item); + sitem->type = stack_idx; + dtor(t), t = get_token(in); + /* + * Special cases: \i\c, \i\e, \i\cw + */ + wd.fpos = t.pos; + if (t.type == tok_cmd && + (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) + { + if (style != word_Normal) + error(err_nestedstyles, &t.pos); + else + { + style = (t.cmd == c_c ? word_Code : + t.cmd == c_cw ? word_WeakCode : word_Emph); + spcstyle = tospacestyle(style); + sitem->type |= stack_style; + } + dtor(t), t = get_token(in); + } + if (t.type != tok_lbrace) + { + sfree(sitem); + error(err_explbr, &t.pos); + } else + { + /* Add an index-reference word with no text as yet */ + wd.type = word_IndexRef; + wd.text = NULL; + wd.alt = NULL; + wd.aux = 0; + wd.breaks = FALSE; + indexword = addword(wd, &whptr); + /* Set up a rdstring to read the index text */ + indexstr = nullrs; + /* Flags so that we do the Right Things with text */ + index_visible = (type != c_I); + index_downcase = (type == c_ii); + indexing = TRUE; + idxwordlist = NULL; + idximplicit = &idxwordlist; + /* Stack item to close the indexing on exit */ + stk_push(parsestk, sitem); + } + break; + case c_u: + uchr = t.aux; + utext[0] = uchr; + utext[1] = 0; + wd.type = style; + wd.breaks = FALSE; + wd.alt = NULL; + wd.aux = 0; + wd.fpos = t.pos; + if (!indexing || index_visible) + { + wd.text = ustrdup(utext); + uword = addword(wd, &whptr); + } else + uword = NULL; + if (indexing) + { + wd.text = ustrdup(utext); + iword = addword(wd, &idximplicit); + } else + iword = NULL; + dtor(t), t = get_token(in); + if (t.type == tok_lbrace) + { + /* + * \u with a left brace. Until the brace + * closes, all further words go on a + * sidetrack from the main thread of the + * paragraph. + */ + sitem = mknew(struct stack_item); + sitem->type = stack_ualt; + sitem->whptr = whptr; + sitem->idximplicit = idximplicit; + stk_push(parsestk, sitem); + whptr = uword ? &uword->alt : NULL; + idximplicit = iword ? &iword->alt : NULL; + } else + { + if (indexing) + rdadd(&indexstr, uchr); + already = TRUE; + } + break; + default: + if (!macrolookup(macros, in, t.text, &t.pos)) + error(err_badmidcmd, t.text, &t.pos); + break; + } + } + if (!already) + dtor(t), t = get_token(in); + seenwhite = iswhite; + } + /* Check the stack is empty */ + if (NULL != (sitem = stk_pop(parsestk))) + { + do + { + sfree(sitem); + sitem = stk_pop(parsestk); + } + while (sitem); + error(err_missingrbrace, &t.pos); + } + stk_free(parsestk); + addpara(par, ret); + } + + /* + * We break to here rather than returning, because otherwise + * this cleanup doesn't happen. + */ + dtor(t); + macrocleanup(macros); +} + +paragraph *read_input(input * in, indexdata * idx) +{ + paragraph *head = NULL; + paragraph **hptr = &head; + + while (in->currindex < in->nfiles) + { + in->currfp = fopen(in->filenames[in->currindex], "r"); + if (in->currfp) + { + setpos(in, in->filenames[in->currindex]); + read_file(&hptr, in, idx); + } + in->currindex++; + } + + return head; +} diff --git a/Docs/src/bin/halibut/keywords.c b/Docs/src/bin/halibut/keywords.c index 0c9ba17..cb7c59e 100755 --- a/Docs/src/bin/halibut/keywords.c +++ b/Docs/src/bin/halibut/keywords.c @@ -1,171 +1,171 @@ -/*
- * keywords.c: keep track of all cross-reference keywords
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include "halibut.h"
-
-static int kwcmp(void *av, void *bv)
-{
- const keyword *a = (const keyword *) av;
- const keyword *b = (const keyword *) bv;
- return ustrcmp(a->key, b->key);
-}
-
-static int kwfind(void *av, void *bv)
-{
- wchar_t *a = (wchar_t *) av;
- const keyword *b = (const keyword *) bv;
- return ustrcmp(a, b->key);
-}
-
-keyword *kw_lookup(keywordlist * kl, wchar_t * str)
-{
- return find234(kl->keys, str, kwfind);
-}
-
-/*
- * This function reads through source form and collects the
- * keywords. They get collected in a heap, sorted by Unicode
- * collation, last at the top (so that we can Heapsort them when we
- * finish).
- */
-keywordlist *get_keywords(paragraph * source)
-{
- int errors = FALSE;
- keywordlist *kl = mknew(keywordlist);
- numberstate *n = number_init();
- int prevpara = para_NotParaType;
-
- number_cfg(n, source);
-
- kl->size = 0;
- kl->keys = newtree234(kwcmp);
- kl->nlooseends = kl->looseendssize = 0;
- kl->looseends = NULL;
- for (; source; source = source->next)
- {
- wchar_t *p, *q;
- p = q = source->keyword;
-
- /*
- * Look for the section type override (`example',
- * `question' or whatever - to replace `chapter' or
- * `section' on a per-section basis).
- */
- if (q)
- {
- q = uadv(q); /* point q at the word beyond */
- if (!*q)
- q = NULL;
- }
-
- /*
- * Number the chapter / section / list-item / whatever.
- * This also sets up the `parent', `child' and `sibling'
- * links.
- */
- source->kwtext = number_mktext(n, source, q, prevpara, &errors);
- prevpara = source->type;
-
- if (p && *p)
- {
- if (source->kwtext || source->type == para_Biblio)
- {
- keyword *kw, *ret;
-
- kw = mknew(keyword);
- kw->key = p;
- kw->text = source->kwtext;
- kw->para = source;
- ret = add234(kl->keys, kw);
- if (ret != kw)
- {
- error(err_multikw, &source->fpos, &ret->para->fpos, p);
- sfree(kw);
- /* FIXME: what happens to kw->text? Does it leak? */
- }
- }
- } else
- {
- if (kl->nlooseends >= kl->looseendssize)
- {
- kl->looseendssize = kl->nlooseends + 32;
- kl->looseends = resize(kl->looseends, kl->looseendssize);
- }
- kl->looseends[kl->nlooseends++] = source->kwtext;
- }
- }
-
- number_free(n);
-
- if (errors)
- {
- free_keywords(kl);
- return NULL;
- }
-
- return kl;
-}
-
-void free_keywords(keywordlist * kl)
-{
- keyword *kw;
- while (kl->nlooseends)
- free_word_list(kl->looseends[--kl->nlooseends]);
- sfree(kl->looseends);
- while ((kw = index234(kl->keys, 0)) != NULL)
- {
- delpos234(kl->keys, 0);
- free_word_list(kw->text);
- sfree(kw);
- }
- freetree234(kl->keys);
- sfree(kl);
-}
-
-void subst_keywords(paragraph * source, keywordlist * kl)
-{
- for (; source; source = source->next)
- {
- word *ptr;
- for (ptr = source->words; ptr; ptr = ptr->next)
- {
- if (ptr->type == word_UpperXref || ptr->type == word_LowerXref)
- {
- keyword *kw;
- word **endptr, *close, *subst;
-
- kw = kw_lookup(kl, ptr->text);
- if (!kw)
- {
- error(err_nosuchkw, &ptr->fpos, ptr->text);
- subst = NULL;
- } else
- subst = dup_word_list(kw->text);
-
- if (subst && ptr->type == word_LowerXref &&
- kw->para->type != para_Biblio &&
- kw->para->type != para_BiblioCited)
- ustrlow(subst->text);
-
- close = mknew(word);
- close->text = NULL;
- close->alt = NULL;
- close->type = word_XrefEnd;
- close->fpos = ptr->fpos;
-
- close->next = ptr->next;
- ptr->next = subst;
-
- for (endptr = &ptr->next; *endptr; endptr = &(*endptr)->next)
- (*endptr)->fpos = ptr->fpos;
-
- *endptr = close;
- ptr = close;
- }
- }
- }
-}
+/* + * keywords.c: keep track of all cross-reference keywords + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include "halibut.h" + +static int kwcmp(void *av, void *bv) +{ + const keyword *a = (const keyword *) av; + const keyword *b = (const keyword *) bv; + return ustrcmp(a->key, b->key); +} + +static int kwfind(void *av, void *bv) +{ + wchar_t *a = (wchar_t *) av; + const keyword *b = (const keyword *) bv; + return ustrcmp(a, b->key); +} + +keyword *kw_lookup(keywordlist * kl, wchar_t * str) +{ + return find234(kl->keys, str, kwfind); +} + +/* + * This function reads through source form and collects the + * keywords. They get collected in a heap, sorted by Unicode + * collation, last at the top (so that we can Heapsort them when we + * finish). + */ +keywordlist *get_keywords(paragraph * source) +{ + int errors = FALSE; + keywordlist *kl = mknew(keywordlist); + numberstate *n = number_init(); + int prevpara = para_NotParaType; + + number_cfg(n, source); + + kl->size = 0; + kl->keys = newtree234(kwcmp); + kl->nlooseends = kl->looseendssize = 0; + kl->looseends = NULL; + for (; source; source = source->next) + { + wchar_t *p, *q; + p = q = source->keyword; + + /* + * Look for the section type override (`example', + * `question' or whatever - to replace `chapter' or + * `section' on a per-section basis). + */ + if (q) + { + q = uadv(q); /* point q at the word beyond */ + if (!*q) + q = NULL; + } + + /* + * Number the chapter / section / list-item / whatever. + * This also sets up the `parent', `child' and `sibling' + * links. + */ + source->kwtext = number_mktext(n, source, q, prevpara, &errors); + prevpara = source->type; + + if (p && *p) + { + if (source->kwtext || source->type == para_Biblio) + { + keyword *kw, *ret; + + kw = mknew(keyword); + kw->key = p; + kw->text = source->kwtext; + kw->para = source; + ret = add234(kl->keys, kw); + if (ret != kw) + { + error(err_multikw, &source->fpos, &ret->para->fpos, p); + sfree(kw); + /* FIXME: what happens to kw->text? Does it leak? */ + } + } + } else + { + if (kl->nlooseends >= kl->looseendssize) + { + kl->looseendssize = kl->nlooseends + 32; + kl->looseends = resize(kl->looseends, kl->looseendssize); + } + kl->looseends[kl->nlooseends++] = source->kwtext; + } + } + + number_free(n); + + if (errors) + { + free_keywords(kl); + return NULL; + } + + return kl; +} + +void free_keywords(keywordlist * kl) +{ + keyword *kw; + while (kl->nlooseends) + free_word_list(kl->looseends[--kl->nlooseends]); + sfree(kl->looseends); + while ((kw = index234(kl->keys, 0)) != NULL) + { + delpos234(kl->keys, 0); + free_word_list(kw->text); + sfree(kw); + } + freetree234(kl->keys); + sfree(kl); +} + +void subst_keywords(paragraph * source, keywordlist * kl) +{ + for (; source; source = source->next) + { + word *ptr; + for (ptr = source->words; ptr; ptr = ptr->next) + { + if (ptr->type == word_UpperXref || ptr->type == word_LowerXref) + { + keyword *kw; + word **endptr, *close, *subst; + + kw = kw_lookup(kl, ptr->text); + if (!kw) + { + error(err_nosuchkw, &ptr->fpos, ptr->text); + subst = NULL; + } else + subst = dup_word_list(kw->text); + + if (subst && ptr->type == word_LowerXref && + kw->para->type != para_Biblio && + kw->para->type != para_BiblioCited) + ustrlow(subst->text); + + close = mknew(word); + close->text = NULL; + close->alt = NULL; + close->type = word_XrefEnd; + close->fpos = ptr->fpos; + + close->next = ptr->next; + ptr->next = subst; + + for (endptr = &ptr->next; *endptr; endptr = &(*endptr)->next) + (*endptr)->fpos = ptr->fpos; + + *endptr = close; + ptr = close; + } + } + } +} diff --git a/Docs/src/bin/halibut/licence.c b/Docs/src/bin/halibut/licence.c index b48e0f1..4dd137b 100755 --- a/Docs/src/bin/halibut/licence.c +++ b/Docs/src/bin/halibut/licence.c @@ -1,17 +1,17 @@ -/*
- * licence.c: licence text
- */
-
-#include <stdio.h>
-
-static char *licencetext[] = {
- "FIXME: licence text goes here",
- NULL
-};
-
-void licence(void)
-{
- char **p;
- for (p = licencetext; *p; p++)
- puts(*p);
-}
+/* + * licence.c: licence text + */ + +#include <stdio.h> + +static char *licencetext[] = { + "FIXME: licence text goes here", + NULL +}; + +void licence(void) +{ + char **p; + for (p = licencetext; *p; p++) + puts(*p); +} diff --git a/Docs/src/bin/halibut/main.c b/Docs/src/bin/halibut/main.c index 18c136c..f82414f 100755 --- a/Docs/src/bin/halibut/main.c +++ b/Docs/src/bin/halibut/main.c @@ -1,321 +1,321 @@ -/*
- * main.c: command line parsing and top level
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "halibut.h"
-
-static void dbg_prtsource(paragraph * sourceform);
-static void dbg_prtwordlist(int level, word * w);
-static void dbg_prtkws(keywordlist * kws);
-
-int main(int argc, char **argv)
-{
- char **infiles;
- char *outfile;
- int nfiles;
- int nogo;
- int errs;
- int reportcols;
- int debug;
-
- /*
- * Set up initial (default) parameters.
- */
- infiles = mknewa(char *, argc);
- outfile = NULL;
- nfiles = 0;
- nogo = errs = FALSE;
- reportcols = 0;
- debug = 0;
-
- if (argc == 1)
- {
- usage();
- exit(EXIT_SUCCESS);
- }
-
- /*
- * Parse command line arguments.
- */
- while (--argc)
- {
- char *p = *++argv;
- if (*p == '-')
- {
- /*
- * An option.
- */
- while (p && *++p)
- {
- char c = *p;
- switch (c)
- {
- case '-':
- /*
- * Long option.
- */
- {
- char *opt, *val;
- opt = p++; /* opt will have _one_ leading - */
- while (*p && *p != '=')
- p++; /* find end of option */
- if (*p == '=')
- {
- *p++ = '\0';
- val = p;
- } else
- val = NULL;
- if (!strcmp(opt, "-version"))
- {
- showversion();
- nogo = TRUE;
- } else if (!strcmp(opt, "-licence") ||
- !strcmp(opt, "-license"))
- {
- licence();
- nogo = TRUE;
- } else if (!strcmp(opt, "-output"))
- {
- if (!val)
- errs = TRUE, error(err_optnoarg, opt);
- else
- outfile = val;
- } else if (!strcmp(opt, "-precise"))
- {
- reportcols = 1;
- } else
- {
- errs = TRUE, error(err_nosuchopt, opt);
- }
- }
- p = NULL;
- break;
- case 'V':
- case 'L':
- case 'P':
- case 'd':
- /*
- * Option requiring no parameter.
- */
- switch (c)
- {
- case 'V':
- showversion();
- nogo = TRUE;
- break;
- case 'L':
- licence();
- nogo = TRUE;
- break;
- case 'P':
- reportcols = 1;
- break;
- case 'd':
- debug = TRUE;
- break;
- }
- break;
- case 'o':
- /*
- * Option requiring parameter.
- */
- p++;
- if (!*p && argc > 1)
- --argc, p = *++argv;
- else if (!*p)
- {
- char opt[2];
- opt[0] = c;
- opt[1] = '\0';
- errs = TRUE, error(err_optnoarg, opt);
- }
- /*
- * Now c is the option and p is the parameter.
- */
- switch (c)
- {
- case 'o':
- outfile = p;
- break;
- }
- p = NULL; /* prevent continued processing */
- break;
- default:
- /*
- * Unrecognised option.
- */
- {
- char opt[2];
- opt[0] = c;
- opt[1] = '\0';
- errs = TRUE, error(err_nosuchopt, opt);
- }
- }
- }
- } else
- {
- /*
- * A non-option argument.
- */
- infiles[nfiles++] = p;
- }
- }
-
- if (errs)
- exit(EXIT_FAILURE);
- if (nogo)
- exit(EXIT_SUCCESS);
-
- /*
- * Do the work.
- */
- if (nfiles == 0)
- {
- error(err_noinput);
- usage();
- exit(EXIT_FAILURE);
- }
-
- {
- input in;
- paragraph *sourceform, *p;
- indexdata *idx;
- keywordlist *keywords;
-
- in.filenames = infiles;
- in.nfiles = nfiles;
- in.currfp = NULL;
- in.currindex = 0;
- in.npushback = in.pushbacksize = 0;
- in.pushback = NULL;
- in.reportcols = reportcols;
- in.stack = NULL;
-
- idx = make_index();
-
- sourceform = read_input(&in, idx);
- if (!sourceform)
- exit(EXIT_FAILURE);
-
- sfree(in.pushback);
-
- mark_attr_ends(sourceform);
-
- sfree(infiles);
-
- keywords = get_keywords(sourceform);
- if (!keywords)
- exit(EXIT_FAILURE);
- gen_citations(sourceform, keywords);
- subst_keywords(sourceform, keywords);
-
- for (p = sourceform; p; p = p->next)
- if (p->type == para_IM)
- index_merge(idx, TRUE, p->keyword, p->words);
-
- build_index(idx);
-
-
- if (debug)
- {
- index_debug(idx);
- dbg_prtkws(keywords);
- dbg_prtsource(sourceform);
- }
-
- xhtml_backend(sourceform, keywords, idx);
-
- free_para_list(sourceform);
- free_keywords(keywords);
- cleanup_index(idx);
- }
-
- return 0;
-}
-
-static void dbg_prtsource(paragraph * sourceform)
-{
- /*
- * Output source form in debugging format.
- */
-
- paragraph *p;
- for (p = sourceform; p; p = p->next)
- {
- wchar_t *wp;
- printf("para %d ", p->type);
- if (p->keyword)
- {
- wp = p->keyword;
- while (*wp)
- {
- putchar('\"');
- for (; *wp; wp++)
- putchar(*wp);
- putchar('\"');
- if (*++wp)
- printf(", ");
- }
- } else
- printf("(no keyword)");
- printf(" {\n");
- dbg_prtwordlist(1, p->words);
- printf("}\n");
- }
-}
-
-static void dbg_prtkws(keywordlist * kws)
-{
- /*
- * Output keywords in debugging format.
- */
-
- int i;
- keyword *kw;
-
- for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++)
- {
- wchar_t *wp;
- printf("keyword ");
- wp = kw->key;
- while (*wp)
- {
- putchar('\"');
- for (; *wp; wp++)
- putchar(*wp);
- putchar('\"');
- if (*++wp)
- printf(", ");
- }
- printf(" {\n");
- dbg_prtwordlist(1, kw->text);
- printf("}\n");
- }
-}
-
-static void dbg_prtwordlist(int level, word * w)
-{
- for (; w; w = w->next)
- {
- wchar_t *wp;
- printf("%*sword %d ", level * 4, "", w->type);
- if (w->text)
- {
- printf("\"");
- for (wp = w->text; *wp; wp++)
- putchar(*wp);
- printf("\"");
- } else
- printf("(no text)");
- if (w->alt)
- {
- printf(" alt = {\n");
- dbg_prtwordlist(level + 1, w->alt);
- printf("%*s}", level * 4, "");
- }
- printf("\n");
- }
-}
+/* + * main.c: command line parsing and top level + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "halibut.h" + +static void dbg_prtsource(paragraph * sourceform); +static void dbg_prtwordlist(int level, word * w); +static void dbg_prtkws(keywordlist * kws); + +int main(int argc, char **argv) +{ + char **infiles; + char *outfile; + int nfiles; + int nogo; + int errs; + int reportcols; + int debug; + + /* + * Set up initial (default) parameters. + */ + infiles = mknewa(char *, argc); + outfile = NULL; + nfiles = 0; + nogo = errs = FALSE; + reportcols = 0; + debug = 0; + + if (argc == 1) + { + usage(); + exit(EXIT_SUCCESS); + } + + /* + * Parse command line arguments. + */ + while (--argc) + { + char *p = *++argv; + if (*p == '-') + { + /* + * An option. + */ + while (p && *++p) + { + char c = *p; + switch (c) + { + case '-': + /* + * Long option. + */ + { + char *opt, *val; + opt = p++; /* opt will have _one_ leading - */ + while (*p && *p != '=') + p++; /* find end of option */ + if (*p == '=') + { + *p++ = '\0'; + val = p; + } else + val = NULL; + if (!strcmp(opt, "-version")) + { + showversion(); + nogo = TRUE; + } else if (!strcmp(opt, "-licence") || + !strcmp(opt, "-license")) + { + licence(); + nogo = TRUE; + } else if (!strcmp(opt, "-output")) + { + if (!val) + errs = TRUE, error(err_optnoarg, opt); + else + outfile = val; + } else if (!strcmp(opt, "-precise")) + { + reportcols = 1; + } else + { + errs = TRUE, error(err_nosuchopt, opt); + } + } + p = NULL; + break; + case 'V': + case 'L': + case 'P': + case 'd': + /* + * Option requiring no parameter. + */ + switch (c) + { + case 'V': + showversion(); + nogo = TRUE; + break; + case 'L': + licence(); + nogo = TRUE; + break; + case 'P': + reportcols = 1; + break; + case 'd': + debug = TRUE; + break; + } + break; + case 'o': + /* + * Option requiring parameter. + */ + p++; + if (!*p && argc > 1) + --argc, p = *++argv; + else if (!*p) + { + char opt[2]; + opt[0] = c; + opt[1] = '\0'; + errs = TRUE, error(err_optnoarg, opt); + } + /* + * Now c is the option and p is the parameter. + */ + switch (c) + { + case 'o': + outfile = p; + break; + } + p = NULL; /* prevent continued processing */ + break; + default: + /* + * Unrecognised option. + */ + { + char opt[2]; + opt[0] = c; + opt[1] = '\0'; + errs = TRUE, error(err_nosuchopt, opt); + } + } + } + } else + { + /* + * A non-option argument. + */ + infiles[nfiles++] = p; + } + } + + if (errs) + exit(EXIT_FAILURE); + if (nogo) + exit(EXIT_SUCCESS); + + /* + * Do the work. + */ + if (nfiles == 0) + { + error(err_noinput); + usage(); + exit(EXIT_FAILURE); + } + + { + input in; + paragraph *sourceform, *p; + indexdata *idx; + keywordlist *keywords; + + in.filenames = infiles; + in.nfiles = nfiles; + in.currfp = NULL; + in.currindex = 0; + in.npushback = in.pushbacksize = 0; + in.pushback = NULL; + in.reportcols = reportcols; + in.stack = NULL; + + idx = make_index(); + + sourceform = read_input(&in, idx); + if (!sourceform) + exit(EXIT_FAILURE); + + sfree(in.pushback); + + mark_attr_ends(sourceform); + + sfree(infiles); + + keywords = get_keywords(sourceform); + if (!keywords) + exit(EXIT_FAILURE); + gen_citations(sourceform, keywords); + subst_keywords(sourceform, keywords); + + for (p = sourceform; p; p = p->next) + if (p->type == para_IM) + index_merge(idx, TRUE, p->keyword, p->words); + + build_index(idx); + + + if (debug) + { + index_debug(idx); + dbg_prtkws(keywords); + dbg_prtsource(sourceform); + } + + xhtml_backend(sourceform, keywords, idx); + + free_para_list(sourceform); + free_keywords(keywords); + cleanup_index(idx); + } + + return 0; +} + +static void dbg_prtsource(paragraph * sourceform) +{ + /* + * Output source form in debugging format. + */ + + paragraph *p; + for (p = sourceform; p; p = p->next) + { + wchar_t *wp; + printf("para %d ", p->type); + if (p->keyword) + { + wp = p->keyword; + while (*wp) + { + putchar('\"'); + for (; *wp; wp++) + putchar(*wp); + putchar('\"'); + if (*++wp) + printf(", "); + } + } else + printf("(no keyword)"); + printf(" {\n"); + dbg_prtwordlist(1, p->words); + printf("}\n"); + } +} + +static void dbg_prtkws(keywordlist * kws) +{ + /* + * Output keywords in debugging format. + */ + + int i; + keyword *kw; + + for (i = 0; (kw = index234(kws->keys, i)) != NULL; i++) + { + wchar_t *wp; + printf("keyword "); + wp = kw->key; + while (*wp) + { + putchar('\"'); + for (; *wp; wp++) + putchar(*wp); + putchar('\"'); + if (*++wp) + printf(", "); + } + printf(" {\n"); + dbg_prtwordlist(1, kw->text); + printf("}\n"); + } +} + +static void dbg_prtwordlist(int level, word * w) +{ + for (; w; w = w->next) + { + wchar_t *wp; + printf("%*sword %d ", level * 4, "", w->type); + if (w->text) + { + printf("\""); + for (wp = w->text; *wp; wp++) + putchar(*wp); + printf("\""); + } else + printf("(no text)"); + if (w->alt) + { + printf(" alt = {\n"); + dbg_prtwordlist(level + 1, w->alt); + printf("%*s}", level * 4, ""); + } + printf("\n"); + } +} diff --git a/Docs/src/bin/halibut/malloc.c b/Docs/src/bin/halibut/malloc.c index 07c8519..24cb676 100755 --- a/Docs/src/bin/halibut/malloc.c +++ b/Docs/src/bin/halibut/malloc.c @@ -1,163 +1,163 @@ -/*
- * malloc.c: safe wrappers around malloc, realloc, free, strdup
- */
-
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include "halibut.h"
-
-#ifdef LOGALLOC
-#define LOGPARAMS char *file, int line,
-static FILE *logallocfp = NULL;
-static int logline = 2; /* off by 1: `null pointer is' */
-static void loginc(void)
-{
-}
-static void logallocinit(void)
-{
- if (!logallocfp)
- {
- logallocfp = fopen("malloc.log", "w");
- if (!logallocfp)
- {
- fprintf(stderr, "panic: unable to open malloc.log\n");
- exit(10);
- }
- setvbuf(logallocfp, NULL, _IOLBF, BUFSIZ);
- fprintf(logallocfp, "null pointer is %p\n", NULL);
- }
-}
-static void logprintf(char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- vfprintf(logallocfp, fmt, ap);
- va_end(ap);
-}
-
-#define LOGPRINT(x) ( logallocinit(), logprintf x )
-#define LOGINC do { loginc(); logline++; } while (0)
-#else
-#define LOGPARAMS
-#define LOGPRINT(x)
-#define LOGINC ((void)0)
-#endif
-
-/*
- * smalloc should guarantee to return a useful pointer - Halibut
- * can do nothing except die when it's out of memory anyway.
- */
-void *(smalloc) (LOGPARAMS int size) {
- void *p;
- LOGINC;
- LOGPRINT(("%s %d malloc(%ld)", file, line, (long) size));
- p = malloc(size);
- if (!p)
- fatal(err_nomemory);
- LOGPRINT((" returns %p\n", p));
- return p;
-}
-
-/*
- * sfree should guaranteeably deal gracefully with freeing NULL
- */
-void (sfree) (LOGPARAMS void *p) {
- if (p)
- {
- LOGINC;
- LOGPRINT(("%s %d free(%p)\n", file, line, p));
- free(p);
- }
-}
-
-/*
- * srealloc should guaranteeably be able to realloc NULL
- */
-void *(srealloc) (LOGPARAMS void *p, int size) {
- void *q;
- if (p)
- {
- LOGINC;
- LOGPRINT(("%s %d realloc(%p,%ld)", file, line, p, (long) size));
- q = realloc(p, size);
- LOGPRINT((" returns %p\n", q));
- } else
- {
- LOGINC;
- LOGPRINT(("%s %d malloc(%ld)", file, line, (long) size));
- q = malloc(size);
- LOGPRINT((" returns %p\n", q));
- }
- if (!q)
- fatal(err_nomemory);
- return q;
-}
-
-/*
- * dupstr is like strdup, but with the never-return-NULL property
- * of smalloc (and also reliably defined in all environments :-)
- */
-char *dupstr(char *s)
-{
- char *r = smalloc(1 + strlen(s));
- strcpy(r, s);
- return r;
-}
-
-/*
- * Duplicate a linked list of words
- */
-word *dup_word_list(word * w)
-{
- word *head, **eptr = &head;
-
- while (w)
- {
- word *newwd = mknew(word);
- *newwd = *w; /* structure copy */
- newwd->text = ustrdup(w->text);
- if (w->alt)
- newwd->alt = dup_word_list(w->alt);
- *eptr = newwd;
- newwd->next = NULL;
- eptr = &newwd->next;
-
- w = w->next;
- }
-
- return head;
-}
-
-/*
- * Free a linked list of words
- */
-void free_word_list(word * w)
-{
- word *t;
- while (w)
- {
- t = w;
- w = w->next;
- sfree(t->text);
- if (t->alt)
- free_word_list(t->alt);
- sfree(t);
- }
-}
-
-/*
- * Free a linked list of paragraphs
- */
-void free_para_list(paragraph * p)
-{
- paragraph *t;
- while (p)
- {
- t = p;
- p = p->next;
- sfree(t->keyword);
- free_word_list(t->words);
- sfree(t);
- }
-}
+/* + * malloc.c: safe wrappers around malloc, realloc, free, strdup + */ + +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include "halibut.h" + +#ifdef LOGALLOC +#define LOGPARAMS char *file, int line, +static FILE *logallocfp = NULL; +static int logline = 2; /* off by 1: `null pointer is' */ +static void loginc(void) +{ +} +static void logallocinit(void) +{ + if (!logallocfp) + { + logallocfp = fopen("malloc.log", "w"); + if (!logallocfp) + { + fprintf(stderr, "panic: unable to open malloc.log\n"); + exit(10); + } + setvbuf(logallocfp, NULL, _IOLBF, BUFSIZ); + fprintf(logallocfp, "null pointer is %p\n", NULL); + } +} +static void logprintf(char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vfprintf(logallocfp, fmt, ap); + va_end(ap); +} + +#define LOGPRINT(x) ( logallocinit(), logprintf x ) +#define LOGINC do { loginc(); logline++; } while (0) +#else +#define LOGPARAMS +#define LOGPRINT(x) +#define LOGINC ((void)0) +#endif + +/* + * smalloc should guarantee to return a useful pointer - Halibut + * can do nothing except die when it's out of memory anyway. + */ +void *(smalloc) (LOGPARAMS int size) { + void *p; + LOGINC; + LOGPRINT(("%s %d malloc(%ld)", file, line, (long) size)); + p = malloc(size); + if (!p) + fatal(err_nomemory); + LOGPRINT((" returns %p\n", p)); + return p; +} + +/* + * sfree should guaranteeably deal gracefully with freeing NULL + */ +void (sfree) (LOGPARAMS void *p) { + if (p) + { + LOGINC; + LOGPRINT(("%s %d free(%p)\n", file, line, p)); + free(p); + } +} + +/* + * srealloc should guaranteeably be able to realloc NULL + */ +void *(srealloc) (LOGPARAMS void *p, int size) { + void *q; + if (p) + { + LOGINC; + LOGPRINT(("%s %d realloc(%p,%ld)", file, line, p, (long) size)); + q = realloc(p, size); + LOGPRINT((" returns %p\n", q)); + } else + { + LOGINC; + LOGPRINT(("%s %d malloc(%ld)", file, line, (long) size)); + q = malloc(size); + LOGPRINT((" returns %p\n", q)); + } + if (!q) + fatal(err_nomemory); + return q; +} + +/* + * dupstr is like strdup, but with the never-return-NULL property + * of smalloc (and also reliably defined in all environments :-) + */ +char *dupstr(char *s) +{ + char *r = smalloc(1 + strlen(s)); + strcpy(r, s); + return r; +} + +/* + * Duplicate a linked list of words + */ +word *dup_word_list(word * w) +{ + word *head, **eptr = &head; + + while (w) + { + word *newwd = mknew(word); + *newwd = *w; /* structure copy */ + newwd->text = ustrdup(w->text); + if (w->alt) + newwd->alt = dup_word_list(w->alt); + *eptr = newwd; + newwd->next = NULL; + eptr = &newwd->next; + + w = w->next; + } + + return head; +} + +/* + * Free a linked list of words + */ +void free_word_list(word * w) +{ + word *t; + while (w) + { + t = w; + w = w->next; + sfree(t->text); + if (t->alt) + free_word_list(t->alt); + sfree(t); + } +} + +/* + * Free a linked list of paragraphs + */ +void free_para_list(paragraph * p) +{ + paragraph *t; + while (p) + { + t = p; + p = p->next; + sfree(t->keyword); + free_word_list(t->words); + sfree(t); + } +} diff --git a/Docs/src/bin/halibut/misc.c b/Docs/src/bin/halibut/misc.c index e1818ad..5fad528 100755 --- a/Docs/src/bin/halibut/misc.c +++ b/Docs/src/bin/halibut/misc.c @@ -1,357 +1,357 @@ -/*
- * misc.c: miscellaneous useful items
- */
-#include <string.h>
-#include "halibut.h"
-
-struct stackTag {
- void **data;
- int sp;
- int size;
-};
-
-stack stk_new(void)
-{
- stack s;
-
- s = mknew(struct stackTag);
- s->sp = 0;
- s->size = 0;
- s->data = NULL;
-
- return s;
-}
-
-void stk_free(stack s)
-{
- sfree(s->data);
- sfree(s);
-}
-
-void stk_push(stack s, void *item)
-{
- if (s->size <= s->sp)
- {
- s->size = s->sp + 32;
- s->data = resize(s->data, s->size);
- }
- s->data[s->sp++] = item;
-}
-
-void *stk_pop(stack s)
-{
- if (s->sp > 0)
- return s->data[--s->sp];
- else
- return NULL;
-}
-
-/*
- * Small routines to amalgamate a string from an input source.
- */
-const rdstring empty_rdstring = { 0, 0, NULL };
-const rdstringc empty_rdstringc = { 0, 0, NULL };
-
-void rdadd(rdstring * rs, wchar_t c)
-{
- if (rs->pos >= rs->size - 1)
- {
- rs->size = rs->pos + 128;
- rs->text = resize(rs->text, rs->size);
- }
- rs->text[rs->pos++] = c;
- rs->text[rs->pos] = 0;
-}
-
-void rdadds(rdstring * rs, wchar_t * p)
-{
- int len = ustrlen(p);
- if (rs->pos >= rs->size - len)
- {
- rs->size = rs->pos + len + 128;
- rs->text = resize(rs->text, rs->size);
- }
- ustrcpy(rs->text + rs->pos, p);
- rs->pos += len;
-}
-
-wchar_t *rdtrim(rdstring * rs)
-{
- rs->text = resize(rs->text, rs->pos + 1);
- return rs->text;
-}
-
-void rdaddc(rdstringc * rs, char c)
-{
- if (rs->pos >= rs->size - 1)
- {
- rs->size = rs->pos + 128;
- rs->text = resize(rs->text, rs->size);
- }
- rs->text[rs->pos++] = c;
- rs->text[rs->pos] = 0;
-}
-
-void rdaddsc(rdstringc * rs, char *p)
-{
- int len = strlen(p);
- if (rs->pos >= rs->size - len)
- {
- rs->size = rs->pos + len + 128;
- rs->text = resize(rs->text, rs->size);
- }
- strcpy(rs->text + rs->pos, p);
- rs->pos += len;
-}
-
-char *rdtrimc(rdstringc * rs)
-{
- rs->text = resize(rs->text, rs->pos + 1);
- return rs->text;
-}
-
-int compare_wordlists(word * a, word * b)
-{
- int t;
- while (a && b)
- {
- if (a->type != b->type)
- return (a->type < b->type ? -1 : +1); /* FIXME? */
- t = a->type;
- if ((t != word_Normal && t != word_Code &&
- t != word_WeakCode && t != word_Emph) || a->alt || b->alt)
- {
- int c;
- if (a->text && b->text)
- {
- c = ustricmp(a->text, b->text);
- if (c)
- return c;
- }
- c = compare_wordlists(a->alt, b->alt);
- if (c)
- return c;
- a = a->next;
- b = b->next;
- } else
- {
- wchar_t *ap = a->text, *bp = b->text;
- while (*ap && *bp)
- {
- wchar_t ac = utolower(*ap), bc = utolower(*bp);
- if (ac != bc)
- return (ac < bc ? -1 : +1);
- if (!*++ap && a->next && a->next->type == t && !a->next->alt)
- a = a->next, ap = a->text;
- if (!*++bp && b->next && b->next->type == t && !b->next->alt)
- b = b->next, bp = b->text;
- }
- if (*ap || *bp)
- return (*ap ? +1 : -1);
- a = a->next;
- b = b->next;
- }
- }
-
- if (a || b)
- return (a ? +1 : -1);
- else
- return 0;
-}
-
-void mark_attr_ends(paragraph * sourceform)
-{
- paragraph *p;
- word *w, *wp;
- for (p = sourceform; p; p = p->next)
- {
- wp = NULL;
- for (w = p->words; w; w = w->next)
- {
- if (isattr(w->type))
- {
- int before = (wp && isattr(wp->type) &&
- sameattr(wp->type, w->type));
- int after = (w->next && isattr(w->next->type) &&
- sameattr(w->next->type, w->type));
- w->aux |= (before ?
- (after ? attr_Always : attr_Last) :
- (after ? attr_First : attr_Only));
- }
- wp = w;
- }
- }
-}
-
-wrappedline *wrap_para(word * text, int width, int subsequentwidth,
- int (*widthfn) (word *))
-{
- wrappedline *head = NULL, **ptr = &head;
- int nwords, wordsize;
- struct wrapword {
- word *begin, *end;
- int width;
- int spacewidth;
- int cost;
- int nwords;
- } *wrapwords;
- int i, j, n;
-
- /*
- * Break the line up into wrappable components.
- */
- nwords = wordsize = 0;
- wrapwords = NULL;
- while (text)
- {
- if (nwords >= wordsize)
- {
- wordsize = nwords + 64;
- wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords));
- }
- wrapwords[nwords].width = 0;
- wrapwords[nwords].begin = text;
- while (text)
- {
- wrapwords[nwords].width += widthfn(text);
- wrapwords[nwords].end = text->next;
- if (text->next && (text->next->type == word_WhiteSpace ||
- text->next->type == word_EmphSpace ||
- text->breaks))
- break;
- text = text->next;
- }
- if (text && text->next && (text->next->type == word_WhiteSpace ||
- text->next->type == word_EmphSpace))
- {
- wrapwords[nwords].spacewidth = widthfn(text->next);
- text = text->next;
- } else
- {
- wrapwords[nwords].spacewidth = 0;
- }
- nwords++;
- if (text)
- text = text->next;
- }
-
- /*
- * Perform the dynamic wrapping algorithm: work backwards from
- * nwords-1, determining the optimal wrapping for each terminal
- * subsequence of the paragraph.
- */
- for (i = nwords; i--;)
- {
- int best = -1;
- int bestcost = 0;
- int cost;
- int linelen = 0, spacewidth = 0;
- int seenspace;
- int thiswidth = (i == 0 ? width : subsequentwidth);
-
- j = 0;
- seenspace = 0;
- while (i + j < nwords)
- {
- /*
- * See what happens if we put j+1 words on this line.
- */
- if (spacewidth)
- seenspace = 1;
- linelen += spacewidth + wrapwords[i + j].width;
- spacewidth = wrapwords[i + j].spacewidth;
- j++;
- if (linelen > thiswidth)
- {
- /*
- * If we're over the width limit, abandon ship,
- * _unless_ there is no best-effort yet (which will
- * only happen if the first word is too long all by
- * itself).
- */
- if (best > 0)
- break;
- }
- if (i + j == nwords)
- {
- /*
- * Special case: if we're at the very end of the
- * paragraph, we don't score penalty points for the
- * white space left on the line.
- */
- cost = 0;
- } else
- {
- cost = (thiswidth - linelen) * (thiswidth - linelen);
- cost += wrapwords[i + j].cost;
- }
- /*
- * We compare bestcost >= cost, not bestcost > cost,
- * because in cases where the costs are identical we
- * want to try to look like the greedy algorithm,
- * because readers are likely to have spent a lot of
- * time looking at greedy-wrapped paragraphs and
- * there's no point violating the Principle of Least
- * Surprise if it doesn't actually gain anything.
- */
- if (best < 0 || bestcost >= cost)
- {
- bestcost = cost;
- best = j;
- }
- }
- /*
- * Now we know the optimal answer for this terminal
- * subsequence, so put it in wrapwords.
- */
- wrapwords[i].cost = bestcost;
- wrapwords[i].nwords = best;
- }
-
- /*
- * We've wrapped the paragraph. Now build the output
- * `wrappedline' list.
- */
- i = 0;
- while (i < nwords)
- {
- wrappedline *w = mknew(wrappedline);
- *ptr = w;
- ptr = &w->next;
- w->next = NULL;
-
- n = wrapwords[i].nwords;
- w->begin = wrapwords[i].begin;
- w->end = wrapwords[i + n - 1].end;
-
- /*
- * Count along the words to find nspaces and shortfall.
- */
- w->nspaces = 0;
- w->shortfall = width;
- for (j = 0; j < n; j++)
- {
- w->shortfall -= wrapwords[i + j].width;
- if (j < n - 1 && wrapwords[i + j].spacewidth)
- {
- w->nspaces++;
- w->shortfall -= wrapwords[i + j].spacewidth;
- }
- }
- i += n;
- }
-
- sfree(wrapwords);
-
- return head;
-}
-
-void wrap_free(wrappedline * w)
-{
- while (w)
- {
- wrappedline *t = w->next;
- sfree(w);
- w = t;
- }
-}
+/* + * misc.c: miscellaneous useful items + */ +#include <string.h> +#include "halibut.h" + +struct stackTag { + void **data; + int sp; + int size; +}; + +stack stk_new(void) +{ + stack s; + + s = mknew(struct stackTag); + s->sp = 0; + s->size = 0; + s->data = NULL; + + return s; +} + +void stk_free(stack s) +{ + sfree(s->data); + sfree(s); +} + +void stk_push(stack s, void *item) +{ + if (s->size <= s->sp) + { + s->size = s->sp + 32; + s->data = resize(s->data, s->size); + } + s->data[s->sp++] = item; +} + +void *stk_pop(stack s) +{ + if (s->sp > 0) + return s->data[--s->sp]; + else + return NULL; +} + +/* + * Small routines to amalgamate a string from an input source. + */ +const rdstring empty_rdstring = { 0, 0, NULL }; +const rdstringc empty_rdstringc = { 0, 0, NULL }; + +void rdadd(rdstring * rs, wchar_t c) +{ + if (rs->pos >= rs->size - 1) + { + rs->size = rs->pos + 128; + rs->text = resize(rs->text, rs->size); + } + rs->text[rs->pos++] = c; + rs->text[rs->pos] = 0; +} + +void rdadds(rdstring * rs, wchar_t * p) +{ + int len = ustrlen(p); + if (rs->pos >= rs->size - len) + { + rs->size = rs->pos + len + 128; + rs->text = resize(rs->text, rs->size); + } + ustrcpy(rs->text + rs->pos, p); + rs->pos += len; +} + +wchar_t *rdtrim(rdstring * rs) +{ + rs->text = resize(rs->text, rs->pos + 1); + return rs->text; +} + +void rdaddc(rdstringc * rs, char c) +{ + if (rs->pos >= rs->size - 1) + { + rs->size = rs->pos + 128; + rs->text = resize(rs->text, rs->size); + } + rs->text[rs->pos++] = c; + rs->text[rs->pos] = 0; +} + +void rdaddsc(rdstringc * rs, char *p) +{ + int len = strlen(p); + if (rs->pos >= rs->size - len) + { + rs->size = rs->pos + len + 128; + rs->text = resize(rs->text, rs->size); + } + strcpy(rs->text + rs->pos, p); + rs->pos += len; +} + +char *rdtrimc(rdstringc * rs) +{ + rs->text = resize(rs->text, rs->pos + 1); + return rs->text; +} + +int compare_wordlists(word * a, word * b) +{ + int t; + while (a && b) + { + if (a->type != b->type) + return (a->type < b->type ? -1 : +1); /* FIXME? */ + t = a->type; + if ((t != word_Normal && t != word_Code && + t != word_WeakCode && t != word_Emph) || a->alt || b->alt) + { + int c; + if (a->text && b->text) + { + c = ustricmp(a->text, b->text); + if (c) + return c; + } + c = compare_wordlists(a->alt, b->alt); + if (c) + return c; + a = a->next; + b = b->next; + } else + { + wchar_t *ap = a->text, *bp = b->text; + while (*ap && *bp) + { + wchar_t ac = utolower(*ap), bc = utolower(*bp); + if (ac != bc) + return (ac < bc ? -1 : +1); + if (!*++ap && a->next && a->next->type == t && !a->next->alt) + a = a->next, ap = a->text; + if (!*++bp && b->next && b->next->type == t && !b->next->alt) + b = b->next, bp = b->text; + } + if (*ap || *bp) + return (*ap ? +1 : -1); + a = a->next; + b = b->next; + } + } + + if (a || b) + return (a ? +1 : -1); + else + return 0; +} + +void mark_attr_ends(paragraph * sourceform) +{ + paragraph *p; + word *w, *wp; + for (p = sourceform; p; p = p->next) + { + wp = NULL; + for (w = p->words; w; w = w->next) + { + if (isattr(w->type)) + { + int before = (wp && isattr(wp->type) && + sameattr(wp->type, w->type)); + int after = (w->next && isattr(w->next->type) && + sameattr(w->next->type, w->type)); + w->aux |= (before ? + (after ? attr_Always : attr_Last) : + (after ? attr_First : attr_Only)); + } + wp = w; + } + } +} + +wrappedline *wrap_para(word * text, int width, int subsequentwidth, + int (*widthfn) (word *)) +{ + wrappedline *head = NULL, **ptr = &head; + int nwords, wordsize; + struct wrapword { + word *begin, *end; + int width; + int spacewidth; + int cost; + int nwords; + } *wrapwords; + int i, j, n; + + /* + * Break the line up into wrappable components. + */ + nwords = wordsize = 0; + wrapwords = NULL; + while (text) + { + if (nwords >= wordsize) + { + wordsize = nwords + 64; + wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords)); + } + wrapwords[nwords].width = 0; + wrapwords[nwords].begin = text; + while (text) + { + wrapwords[nwords].width += widthfn(text); + wrapwords[nwords].end = text->next; + if (text->next && (text->next->type == word_WhiteSpace || + text->next->type == word_EmphSpace || + text->breaks)) + break; + text = text->next; + } + if (text && text->next && (text->next->type == word_WhiteSpace || + text->next->type == word_EmphSpace)) + { + wrapwords[nwords].spacewidth = widthfn(text->next); + text = text->next; + } else + { + wrapwords[nwords].spacewidth = 0; + } + nwords++; + if (text) + text = text->next; + } + + /* + * Perform the dynamic wrapping algorithm: work backwards from + * nwords-1, determining the optimal wrapping for each terminal + * subsequence of the paragraph. + */ + for (i = nwords; i--;) + { + int best = -1; + int bestcost = 0; + int cost; + int linelen = 0, spacewidth = 0; + int seenspace; + int thiswidth = (i == 0 ? width : subsequentwidth); + + j = 0; + seenspace = 0; + while (i + j < nwords) + { + /* + * See what happens if we put j+1 words on this line. + */ + if (spacewidth) + seenspace = 1; + linelen += spacewidth + wrapwords[i + j].width; + spacewidth = wrapwords[i + j].spacewidth; + j++; + if (linelen > thiswidth) + { + /* + * If we're over the width limit, abandon ship, + * _unless_ there is no best-effort yet (which will + * only happen if the first word is too long all by + * itself). + */ + if (best > 0) + break; + } + if (i + j == nwords) + { + /* + * Special case: if we're at the very end of the + * paragraph, we don't score penalty points for the + * white space left on the line. + */ + cost = 0; + } else + { + cost = (thiswidth - linelen) * (thiswidth - linelen); + cost += wrapwords[i + j].cost; + } + /* + * We compare bestcost >= cost, not bestcost > cost, + * because in cases where the costs are identical we + * want to try to look like the greedy algorithm, + * because readers are likely to have spent a lot of + * time looking at greedy-wrapped paragraphs and + * there's no point violating the Principle of Least + * Surprise if it doesn't actually gain anything. + */ + if (best < 0 || bestcost >= cost) + { + bestcost = cost; + best = j; + } + } + /* + * Now we know the optimal answer for this terminal + * subsequence, so put it in wrapwords. + */ + wrapwords[i].cost = bestcost; + wrapwords[i].nwords = best; + } + + /* + * We've wrapped the paragraph. Now build the output + * `wrappedline' list. + */ + i = 0; + while (i < nwords) + { + wrappedline *w = mknew(wrappedline); + *ptr = w; + ptr = &w->next; + w->next = NULL; + + n = wrapwords[i].nwords; + w->begin = wrapwords[i].begin; + w->end = wrapwords[i + n - 1].end; + + /* + * Count along the words to find nspaces and shortfall. + */ + w->nspaces = 0; + w->shortfall = width; + for (j = 0; j < n; j++) + { + w->shortfall -= wrapwords[i + j].width; + if (j < n - 1 && wrapwords[i + j].spacewidth) + { + w->nspaces++; + w->shortfall -= wrapwords[i + j].spacewidth; + } + } + i += n; + } + + sfree(wrapwords); + + return head; +} + +void wrap_free(wrappedline * w) +{ + while (w) + { + wrappedline *t = w->next; + sfree(w); + w = t; + } +} diff --git a/Docs/src/bin/halibut/style.c b/Docs/src/bin/halibut/style.c index 2e92b41..8a3f3e6 100755 --- a/Docs/src/bin/halibut/style.c +++ b/Docs/src/bin/halibut/style.c @@ -1,7 +1,7 @@ -/*
- * style.c: load and keep track of user style preferences
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "halibut.h"
+/* + * style.c: load and keep track of user style preferences + */ + +#include <stdio.h> +#include <stdlib.h> +#include "halibut.h" diff --git a/Docs/src/bin/halibut/tree234.c b/Docs/src/bin/halibut/tree234.c index 7a72a3f..71ce298 100755 --- a/Docs/src/bin/halibut/tree234.c +++ b/Docs/src/bin/halibut/tree234.c @@ -1,2460 +1,2460 @@ -/*
- * tree234.c: reasonably generic counted 2-3-4 tree routines.
- *
- * This file is copyright 1999-2001 Simon Tatham.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
- * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-
-#include "tree234.h"
-
-#define smalloc malloc
-#define sfree free
-
-#define mknew(typ) ( (typ *) smalloc (sizeof (typ)) )
-
-#ifdef TEST
-#define LOG(x) (printf x)
-#else
-#define LOG(x)
-#endif
-
-typedef struct node234_Tag node234;
-
-struct tree234_Tag {
- node234 *root;
- cmpfn234 cmp;
-};
-
-struct node234_Tag {
- node234 *parent;
- node234 *kids[4];
- int counts[4];
- void *elems[3];
-};
-
-/*
- * Create a 2-3-4 tree.
- */
-tree234 *newtree234(cmpfn234 cmp)
-{
- tree234 *ret = mknew(tree234);
- LOG(("created tree %p\n", ret));
- ret->root = NULL;
- ret->cmp = cmp;
- return ret;
-}
-
-/*
- * Free a 2-3-4 tree (not including freeing the elements).
- */
-static void freenode234(node234 * n)
-{
- if (!n)
- return;
- freenode234(n->kids[0]);
- freenode234(n->kids[1]);
- freenode234(n->kids[2]);
- freenode234(n->kids[3]);
- sfree(n);
-}
-
-void freetree234(tree234 * t)
-{
- freenode234(t->root);
- sfree(t);
-}
-
-/*
- * Internal function to count a node.
- */
-static int countnode234(node234 * n)
-{
- int count = 0;
- int i;
- if (!n)
- return 0;
- for (i = 0; i < 4; i++)
- count += n->counts[i];
- for (i = 0; i < 3; i++)
- if (n->elems[i])
- count++;
- return count;
-}
-
-/*
- * Count the elements in a tree.
- */
-int count234(tree234 * t)
-{
- if (t->root)
- return countnode234(t->root);
- else
- return 0;
-}
-
-/*
- * Propagate a node overflow up a tree until it stops. Returns 0 or
- * 1, depending on whether the root had to be split or not.
- */
-static int
-add234_insert(node234 * left, void *e, node234 * right,
- node234 ** root, node234 * n, int ki)
-{
- int lcount, rcount;
- /*
- * We need to insert the new left/element/right set in n at
- * child position ki.
- */
- lcount = countnode234(left);
- rcount = countnode234(right);
- while (n)
- {
- LOG((" at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- n,
- n->kids[0], n->counts[0], n->elems[0],
- n->kids[1], n->counts[1], n->elems[1],
- n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3]));
- LOG((" need to insert %p/%d \"%s\" %p/%d at position %d\n", left,
- lcount, e, right, rcount, ki));
- if (n->elems[1] == NULL)
- {
- /*
- * Insert in a 2-node; simple.
- */
- if (ki == 0)
- {
- LOG((" inserting on left of 2-node\n"));
- n->kids[2] = n->kids[1];
- n->counts[2] = n->counts[1];
- n->elems[1] = n->elems[0];
- n->kids[1] = right;
- n->counts[1] = rcount;
- n->elems[0] = e;
- n->kids[0] = left;
- n->counts[0] = lcount;
- } else
- { /* ki == 1 */
- LOG((" inserting on right of 2-node\n"));
- n->kids[2] = right;
- n->counts[2] = rcount;
- n->elems[1] = e;
- n->kids[1] = left;
- n->counts[1] = lcount;
- }
- if (n->kids[0])
- n->kids[0]->parent = n;
- if (n->kids[1])
- n->kids[1]->parent = n;
- if (n->kids[2])
- n->kids[2]->parent = n;
- LOG((" done\n"));
- break;
- } else if (n->elems[2] == NULL)
- {
- /*
- * Insert in a 3-node; simple.
- */
- if (ki == 0)
- {
- LOG((" inserting on left of 3-node\n"));
- n->kids[3] = n->kids[2];
- n->counts[3] = n->counts[2];
- n->elems[2] = n->elems[1];
- n->kids[2] = n->kids[1];
- n->counts[2] = n->counts[1];
- n->elems[1] = n->elems[0];
- n->kids[1] = right;
- n->counts[1] = rcount;
- n->elems[0] = e;
- n->kids[0] = left;
- n->counts[0] = lcount;
- } else if (ki == 1)
- {
- LOG((" inserting in middle of 3-node\n"));
- n->kids[3] = n->kids[2];
- n->counts[3] = n->counts[2];
- n->elems[2] = n->elems[1];
- n->kids[2] = right;
- n->counts[2] = rcount;
- n->elems[1] = e;
- n->kids[1] = left;
- n->counts[1] = lcount;
- } else
- { /* ki == 2 */
- LOG((" inserting on right of 3-node\n"));
- n->kids[3] = right;
- n->counts[3] = rcount;
- n->elems[2] = e;
- n->kids[2] = left;
- n->counts[2] = lcount;
- }
- if (n->kids[0])
- n->kids[0]->parent = n;
- if (n->kids[1])
- n->kids[1]->parent = n;
- if (n->kids[2])
- n->kids[2]->parent = n;
- if (n->kids[3])
- n->kids[3]->parent = n;
- LOG((" done\n"));
- break;
- } else
- {
- node234 *m = mknew(node234);
- m->parent = n->parent;
- LOG((" splitting a 4-node; created new node %p\n", m));
- /*
- * Insert in a 4-node; split into a 2-node and a
- * 3-node, and move focus up a level.
- *
- * I don't think it matters which way round we put the
- * 2 and the 3. For simplicity, we'll put the 3 first
- * always.
- */
- if (ki == 0)
- {
- m->kids[0] = left;
- m->counts[0] = lcount;
- m->elems[0] = e;
- m->kids[1] = right;
- m->counts[1] = rcount;
- m->elems[1] = n->elems[0];
- m->kids[2] = n->kids[1];
- m->counts[2] = n->counts[1];
- e = n->elems[1];
- n->kids[0] = n->kids[2];
- n->counts[0] = n->counts[2];
- n->elems[0] = n->elems[2];
- n->kids[1] = n->kids[3];
- n->counts[1] = n->counts[3];
- } else if (ki == 1)
- {
- m->kids[0] = n->kids[0];
- m->counts[0] = n->counts[0];
- m->elems[0] = n->elems[0];
- m->kids[1] = left;
- m->counts[1] = lcount;
- m->elems[1] = e;
- m->kids[2] = right;
- m->counts[2] = rcount;
- e = n->elems[1];
- n->kids[0] = n->kids[2];
- n->counts[0] = n->counts[2];
- n->elems[0] = n->elems[2];
- n->kids[1] = n->kids[3];
- n->counts[1] = n->counts[3];
- } else if (ki == 2)
- {
- m->kids[0] = n->kids[0];
- m->counts[0] = n->counts[0];
- m->elems[0] = n->elems[0];
- m->kids[1] = n->kids[1];
- m->counts[1] = n->counts[1];
- m->elems[1] = n->elems[1];
- m->kids[2] = left;
- m->counts[2] = lcount;
- /* e = e; */
- n->kids[0] = right;
- n->counts[0] = rcount;
- n->elems[0] = n->elems[2];
- n->kids[1] = n->kids[3];
- n->counts[1] = n->counts[3];
- } else
- { /* ki == 3 */
- m->kids[0] = n->kids[0];
- m->counts[0] = n->counts[0];
- m->elems[0] = n->elems[0];
- m->kids[1] = n->kids[1];
- m->counts[1] = n->counts[1];
- m->elems[1] = n->elems[1];
- m->kids[2] = n->kids[2];
- m->counts[2] = n->counts[2];
- n->kids[0] = left;
- n->counts[0] = lcount;
- n->elems[0] = e;
- n->kids[1] = right;
- n->counts[1] = rcount;
- e = n->elems[2];
- }
- m->kids[3] = n->kids[3] = n->kids[2] = NULL;
- m->counts[3] = n->counts[3] = n->counts[2] = 0;
- m->elems[2] = n->elems[2] = n->elems[1] = NULL;
- if (m->kids[0])
- m->kids[0]->parent = m;
- if (m->kids[1])
- m->kids[1]->parent = m;
- if (m->kids[2])
- m->kids[2]->parent = m;
- if (n->kids[0])
- n->kids[0]->parent = n;
- if (n->kids[1])
- n->kids[1]->parent = n;
- LOG((" left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m,
- m->kids[0], m->counts[0], m->elems[0],
- m->kids[1], m->counts[1], m->elems[1],
- m->kids[2], m->counts[2]));
- LOG((" right (%p): %p/%d \"%s\" %p/%d\n", n,
- n->kids[0], n->counts[0], n->elems[0],
- n->kids[1], n->counts[1]));
- left = m;
- lcount = countnode234(left);
- right = n;
- rcount = countnode234(right);
- }
- if (n->parent)
- ki = (n->parent->kids[0] == n ? 0 :
- n->parent->kids[1] == n ? 1 : n->parent->kids[2] == n ? 2 : 3);
- n = n->parent;
- }
-
- /*
- * If we've come out of here by `break', n will still be
- * non-NULL and all we need to do is go back up the tree
- * updating counts. If we've come here because n is NULL, we
- * need to create a new root for the tree because the old one
- * has just split into two. */
- if (n)
- {
- while (n->parent)
- {
- int count = countnode234(n);
- int childnum;
- childnum = (n->parent->kids[0] == n ? 0 :
- n->parent->kids[1] == n ? 1 :
- n->parent->kids[2] == n ? 2 : 3);
- n->parent->counts[childnum] = count;
- n = n->parent;
- }
- return 0; /* root unchanged */
- } else
- {
- LOG((" root is overloaded, split into two\n"));
- (*root) = mknew(node234);
- (*root)->kids[0] = left;
- (*root)->counts[0] = lcount;
- (*root)->elems[0] = e;
- (*root)->kids[1] = right;
- (*root)->counts[1] = rcount;
- (*root)->elems[1] = NULL;
- (*root)->kids[2] = NULL;
- (*root)->counts[2] = 0;
- (*root)->elems[2] = NULL;
- (*root)->kids[3] = NULL;
- (*root)->counts[3] = 0;
- (*root)->parent = NULL;
- if ((*root)->kids[0])
- (*root)->kids[0]->parent = (*root);
- if ((*root)->kids[1])
- (*root)->kids[1]->parent = (*root);
- LOG((" new root is %p/%d \"%s\" %p/%d\n",
- (*root)->kids[0], (*root)->counts[0],
- (*root)->elems[0], (*root)->kids[1], (*root)->counts[1]));
- return 1; /* root moved */
- }
-}
-
-/*
- * Add an element e to a 2-3-4 tree t. Returns e on success, or if
- * an existing element compares equal, returns that.
- */
-static void *add234_internal(tree234 * t, void *e, int index)
-{
- node234 *n;
- int ki;
- void *orig_e = e;
- int c;
-
- LOG(("adding element \"%s\" to tree %p\n", e, t));
- if (t->root == NULL)
- {
- t->root = mknew(node234);
- t->root->elems[1] = t->root->elems[2] = NULL;
- t->root->kids[0] = t->root->kids[1] = NULL;
- t->root->kids[2] = t->root->kids[3] = NULL;
- t->root->counts[0] = t->root->counts[1] = 0;
- t->root->counts[2] = t->root->counts[3] = 0;
- t->root->parent = NULL;
- t->root->elems[0] = e;
- LOG((" created root %p\n", t->root));
- return orig_e;
- }
-
- n = t->root;
- while (n)
- {
- LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- n,
- n->kids[0], n->counts[0], n->elems[0],
- n->kids[1], n->counts[1], n->elems[1],
- n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3]));
- if (index >= 0)
- {
- if (!n->kids[0])
- {
- /*
- * Leaf node. We want to insert at kid position
- * equal to the index:
- *
- * 0 A 1 B 2 C 3
- */
- ki = index;
- } else
- {
- /*
- * Internal node. We always descend through it (add
- * always starts at the bottom, never in the
- * middle).
- */
- if (index <= n->counts[0])
- {
- ki = 0;
- } else if (index -= n->counts[0] + 1, index <= n->counts[1])
- {
- ki = 1;
- } else if (index -= n->counts[1] + 1, index <= n->counts[2])
- {
- ki = 2;
- } else if (index -= n->counts[2] + 1, index <= n->counts[3])
- {
- ki = 3;
- } else
- return NULL; /* error: index out of range */
- }
- } else
- {
- if ((c = t->cmp(e, n->elems[0])) < 0)
- ki = 0;
- else if (c == 0)
- return n->elems[0]; /* already exists */
- else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0)
- ki = 1;
- else if (c == 0)
- return n->elems[1]; /* already exists */
- else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0)
- ki = 2;
- else if (c == 0)
- return n->elems[2]; /* already exists */
- else
- ki = 3;
- }
- LOG((" moving to child %d (%p)\n", ki, n->kids[ki]));
- if (!n->kids[ki])
- break;
- n = n->kids[ki];
- }
-
- add234_insert(NULL, e, NULL, &t->root, n, ki);
-
- return orig_e;
-}
-
-void *add234(tree234 * t, void *e)
-{
- if (!t->cmp) /* tree is unsorted */
- return NULL;
-
- return add234_internal(t, e, -1);
-}
-
-void *addpos234(tree234 * t, void *e, int index)
-{
- if (index < 0 || /* index out of range */
- t->cmp) /* tree is sorted */
- return NULL; /* return failure */
-
- return add234_internal(t, e, index); /* this checks the upper bound */
-}
-
-/*
- * Look up the element at a given numeric index in a 2-3-4 tree.
- * Returns NULL if the index is out of range.
- */
-void *index234(tree234 * t, int index)
-{
- node234 *n;
-
- if (!t->root)
- return NULL; /* tree is empty */
-
- if (index < 0 || index >= countnode234(t->root))
- return NULL; /* out of range */
-
- n = t->root;
-
- while (n)
- {
- if (index < n->counts[0])
- n = n->kids[0];
- else if (index -= n->counts[0] + 1, index < 0)
- return n->elems[0];
- else if (index < n->counts[1])
- n = n->kids[1];
- else if (index -= n->counts[1] + 1, index < 0)
- return n->elems[1];
- else if (index < n->counts[2])
- n = n->kids[2];
- else if (index -= n->counts[2] + 1, index < 0)
- return n->elems[2];
- else
- n = n->kids[3];
- }
-
- /* We shouldn't ever get here. I wonder how we did. */
- return NULL;
-}
-
-/*
- * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
- * found. e is always passed as the first argument to cmp, so cmp
- * can be an asymmetric function if desired. cmp can also be passed
- * as NULL, in which case the compare function from the tree proper
- * will be used.
- */
-void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp, int relation,
- int *index)
-{
- node234 *n;
- void *ret;
- int c;
- int idx, ecount, kcount, cmpret;
-
- if (t->root == NULL)
- return NULL;
-
- if (cmp == NULL)
- cmp = t->cmp;
-
- n = t->root;
- /*
- * Attempt to find the element itself.
- */
- idx = 0;
- ecount = -1;
- /*
- * Prepare a fake `cmp' result if e is NULL.
- */
- cmpret = 0;
- if (e == NULL)
- {
- assert(relation == REL234_LT || relation == REL234_GT);
- if (relation == REL234_LT)
- cmpret = +1; /* e is a max: always greater */
- else if (relation == REL234_GT)
- cmpret = -1; /* e is a min: always smaller */
- }
- while (1)
- {
- for (kcount = 0; kcount < 4; kcount++)
- {
- if (kcount >= 3 || n->elems[kcount] == NULL ||
- (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0)
- {
- break;
- }
- if (n->kids[kcount])
- idx += n->counts[kcount];
- if (c == 0)
- {
- ecount = kcount;
- break;
- }
- idx++;
- }
- if (ecount >= 0)
- break;
- if (n->kids[kcount])
- n = n->kids[kcount];
- else
- break;
- }
-
- if (ecount >= 0)
- {
- /*
- * We have found the element we're looking for. It's
- * n->elems[ecount], at tree index idx. If our search
- * relation is EQ, LE or GE we can now go home.
- */
- if (relation != REL234_LT && relation != REL234_GT)
- {
- if (index)
- *index = idx;
- return n->elems[ecount];
- }
-
- /*
- * Otherwise, we'll do an indexed lookup for the previous
- * or next element. (It would be perfectly possible to
- * implement these search types in a non-counted tree by
- * going back up from where we are, but far more fiddly.)
- */
- if (relation == REL234_LT)
- idx--;
- else
- idx++;
- } else
- {
- /*
- * We've found our way to the bottom of the tree and we
- * know where we would insert this node if we wanted to:
- * we'd put it in in place of the (empty) subtree
- * n->kids[kcount], and it would have index idx
- *
- * But the actual element isn't there. So if our search
- * relation is EQ, we're doomed.
- */
- if (relation == REL234_EQ)
- return NULL;
-
- /*
- * Otherwise, we must do an index lookup for index idx-1
- * (if we're going left - LE or LT) or index idx (if we're
- * going right - GE or GT).
- */
- if (relation == REL234_LT || relation == REL234_LE)
- {
- idx--;
- }
- }
-
- /*
- * We know the index of the element we want; just call index234
- * to do the rest. This will return NULL if the index is out of
- * bounds, which is exactly what we want.
- */
- ret = index234(t, idx);
- if (ret && index)
- *index = idx;
- return ret;
-}
-
-void *find234(tree234 * t, void *e, cmpfn234 cmp)
-{
- return findrelpos234(t, e, cmp, REL234_EQ, NULL);
-}
-
-void *findrel234(tree234 * t, void *e, cmpfn234 cmp, int relation)
-{
- return findrelpos234(t, e, cmp, relation, NULL);
-}
-
-void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index)
-{
- return findrelpos234(t, e, cmp, REL234_EQ, index);
-}
-
-/*
- * Tree transformation used in delete and split: move a subtree
- * right, from child ki of a node to the next child. Update k and
- * index so that they still point to the same place in the
- * transformed tree. Assumes the destination child is not full, and
- * that the source child does have a subtree to spare. Can cope if
- * the destination child is undersized.
- *
- * . C . . B .
- * / \ -> / \
- * [more] a A b B c d D e [more] a A b c C d D e
- *
- * . C . . B .
- * / \ -> / \
- * [more] a A b B c d [more] a A b c C d
- */
-static void trans234_subtree_right(node234 * n, int ki, int *k, int *index)
-{
- node234 *src, *dest;
- int i, srclen, adjust;
-
- src = n->kids[ki];
- dest = n->kids[ki + 1];
-
- LOG((" trans234_subtree_right(%p, %d):\n", n, ki));
- LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- n,
- n->kids[0], n->counts[0], n->elems[0],
- n->kids[1], n->counts[1], n->elems[1],
- n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3]));
- LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- src,
- src->kids[0], src->counts[0], src->elems[0],
- src->kids[1], src->counts[1], src->elems[1],
- src->kids[2], src->counts[2], src->elems[2],
- src->kids[3], src->counts[3]));
- LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- dest,
- dest->kids[0], dest->counts[0], dest->elems[0],
- dest->kids[1], dest->counts[1], dest->elems[1],
- dest->kids[2], dest->counts[2], dest->elems[2],
- dest->kids[3], dest->counts[3]));
- /*
- * Move over the rest of the destination node to make space.
- */
- dest->kids[3] = dest->kids[2];
- dest->counts[3] = dest->counts[2];
- dest->elems[2] = dest->elems[1];
- dest->kids[2] = dest->kids[1];
- dest->counts[2] = dest->counts[1];
- dest->elems[1] = dest->elems[0];
- dest->kids[1] = dest->kids[0];
- dest->counts[1] = dest->counts[0];
-
- /* which element to move over */
- i = (src->elems[2] ? 2 : src->elems[1] ? 1 : 0);
-
- dest->elems[0] = n->elems[ki];
- n->elems[ki] = src->elems[i];
- src->elems[i] = NULL;
-
- dest->kids[0] = src->kids[i + 1];
- dest->counts[0] = src->counts[i + 1];
- src->kids[i + 1] = NULL;
- src->counts[i + 1] = 0;
-
- if (dest->kids[0])
- dest->kids[0]->parent = dest;
-
- adjust = dest->counts[0] + 1;
-
- n->counts[ki] -= adjust;
- n->counts[ki + 1] += adjust;
-
- srclen = n->counts[ki];
-
- if (k)
- {
- LOG((" before: k,index = %d,%d\n", (*k), (*index)));
- if ((*k) == ki && (*index) > srclen)
- {
- (*index) -= srclen + 1;
- (*k)++;
- } else if ((*k) == ki + 1)
- {
- (*index) += adjust;
- }
- LOG((" after: k,index = %d,%d\n", (*k), (*index)));
- }
-
- LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- n,
- n->kids[0], n->counts[0], n->elems[0],
- n->kids[1], n->counts[1], n->elems[1],
- n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3]));
- LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- src,
- src->kids[0], src->counts[0], src->elems[0],
- src->kids[1], src->counts[1], src->elems[1],
- src->kids[2], src->counts[2], src->elems[2],
- src->kids[3], src->counts[3]));
- LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- dest,
- dest->kids[0], dest->counts[0], dest->elems[0],
- dest->kids[1], dest->counts[1], dest->elems[1],
- dest->kids[2], dest->counts[2], dest->elems[2],
- dest->kids[3], dest->counts[3]));
-}
-
-/*
- * Tree transformation used in delete and split: move a subtree
- * left, from child ki of a node to the previous child. Update k
- * and index so that they still point to the same place in the
- * transformed tree. Assumes the destination child is not full, and
- * that the source child does have a subtree to spare. Can cope if
- * the destination child is undersized.
- *
- * . B . . C .
- * / \ -> / \
- * a A b c C d D e [more] a A b B c d D e [more]
- *
- * . A . . B .
- * / \ -> / \
- * a b B c C d [more] a A b c C d [more]
- */
-static void trans234_subtree_left(node234 * n, int ki, int *k, int *index)
-{
- node234 *src, *dest;
- int i, adjust;
-
- src = n->kids[ki];
- dest = n->kids[ki - 1];
-
- LOG((" trans234_subtree_left(%p, %d):\n", n, ki));
- LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- n,
- n->kids[0], n->counts[0], n->elems[0],
- n->kids[1], n->counts[1], n->elems[1],
- n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3]));
- LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- dest,
- dest->kids[0], dest->counts[0], dest->elems[0],
- dest->kids[1], dest->counts[1], dest->elems[1],
- dest->kids[2], dest->counts[2], dest->elems[2],
- dest->kids[3], dest->counts[3]));
- LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- src,
- src->kids[0], src->counts[0], src->elems[0],
- src->kids[1], src->counts[1], src->elems[1],
- src->kids[2], src->counts[2], src->elems[2],
- src->kids[3], src->counts[3]));
-
- /* where in dest to put it */
- i = (dest->elems[1] ? 2 : dest->elems[0] ? 1 : 0);
- dest->elems[i] = n->elems[ki - 1];
- n->elems[ki - 1] = src->elems[0];
-
- dest->kids[i + 1] = src->kids[0];
- dest->counts[i + 1] = src->counts[0];
-
- if (dest->kids[i + 1])
- dest->kids[i + 1]->parent = dest;
-
- /*
- * Move over the rest of the source node.
- */
- src->kids[0] = src->kids[1];
- src->counts[0] = src->counts[1];
- src->elems[0] = src->elems[1];
- src->kids[1] = src->kids[2];
- src->counts[1] = src->counts[2];
- src->elems[1] = src->elems[2];
- src->kids[2] = src->kids[3];
- src->counts[2] = src->counts[3];
- src->elems[2] = NULL;
- src->kids[3] = NULL;
- src->counts[3] = 0;
-
- adjust = dest->counts[i + 1] + 1;
-
- n->counts[ki] -= adjust;
- n->counts[ki - 1] += adjust;
-
- if (k)
- {
- LOG((" before: k,index = %d,%d\n", (*k), (*index)));
- if ((*k) == ki)
- {
- (*index) -= adjust;
- if ((*index) < 0)
- {
- (*index) += n->counts[ki - 1] + 1;
- (*k)--;
- }
- }
- LOG((" after: k,index = %d,%d\n", (*k), (*index)));
- }
-
- LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- n,
- n->kids[0], n->counts[0], n->elems[0],
- n->kids[1], n->counts[1], n->elems[1],
- n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3]));
- LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- dest,
- dest->kids[0], dest->counts[0], dest->elems[0],
- dest->kids[1], dest->counts[1], dest->elems[1],
- dest->kids[2], dest->counts[2], dest->elems[2],
- dest->kids[3], dest->counts[3]));
- LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- src,
- src->kids[0], src->counts[0], src->elems[0],
- src->kids[1], src->counts[1], src->elems[1],
- src->kids[2], src->counts[2], src->elems[2],
- src->kids[3], src->counts[3]));
-}
-
-/*
- * Tree transformation used in delete and split: merge child nodes
- * ki and ki+1 of a node. Update k and index so that they still
- * point to the same place in the transformed tree. Assumes both
- * children _are_ sufficiently small.
- *
- * . B . .
- * / \ -> |
- * a A b c C d a A b B c C d
- *
- * This routine can also cope with either child being undersized:
- *
- * . A . .
- * / \ -> |
- * a b B c a A b B c
- *
- * . A . .
- * / \ -> |
- * a b B c C d a A b B c C d
- */
-static void trans234_subtree_merge(node234 * n, int ki, int *k, int *index)
-{
- node234 *left, *right;
- int i, leftlen, rightlen, lsize, rsize;
-
- left = n->kids[ki];
- leftlen = n->counts[ki];
- right = n->kids[ki + 1];
- rightlen = n->counts[ki + 1];
-
- LOG((" trans234_subtree_merge(%p, %d):\n", n, ki));
- LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- n,
- n->kids[0], n->counts[0], n->elems[0],
- n->kids[1], n->counts[1], n->elems[1],
- n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3]));
- LOG((" left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- left,
- left->kids[0], left->counts[0], left->elems[0],
- left->kids[1], left->counts[1], left->elems[1],
- left->kids[2], left->counts[2], left->elems[2],
- left->kids[3], left->counts[3]));
- LOG((" right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- right,
- right->kids[0], right->counts[0], right->elems[0],
- right->kids[1], right->counts[1], right->elems[1],
- right->kids[2], right->counts[2], right->elems[2],
- right->kids[3], right->counts[3]));
-
- assert(!left->elems[2] && !right->elems[2]); /* neither is large! */
- lsize = (left->elems[1] ? 2 : left->elems[0] ? 1 : 0);
- rsize = (right->elems[1] ? 2 : right->elems[0] ? 1 : 0);
-
- left->elems[lsize] = n->elems[ki];
-
- for (i = 0; i < rsize + 1; i++)
- {
- left->kids[lsize + 1 + i] = right->kids[i];
- left->counts[lsize + 1 + i] = right->counts[i];
- if (left->kids[lsize + 1 + i])
- left->kids[lsize + 1 + i]->parent = left;
- if (i < rsize)
- left->elems[lsize + 1 + i] = right->elems[i];
- }
-
- n->counts[ki] += rightlen + 1;
-
- sfree(right);
-
- /*
- * Move the rest of n up by one.
- */
- for (i = ki + 1; i < 3; i++)
- {
- n->kids[i] = n->kids[i + 1];
- n->counts[i] = n->counts[i + 1];
- }
- for (i = ki; i < 2; i++)
- {
- n->elems[i] = n->elems[i + 1];
- }
- n->kids[3] = NULL;
- n->counts[3] = 0;
- n->elems[2] = NULL;
-
- if (k)
- {
- LOG((" before: k,index = %d,%d\n", (*k), (*index)));
- if ((*k) == ki + 1)
- {
- (*k)--;
- (*index) += leftlen + 1;
- } else if ((*k) > ki + 1)
- {
- (*k)--;
- }
- LOG((" after: k,index = %d,%d\n", (*k), (*index)));
- }
-
- LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- n,
- n->kids[0], n->counts[0], n->elems[0],
- n->kids[1], n->counts[1], n->elems[1],
- n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3]));
- LOG((" merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- left,
- left->kids[0], left->counts[0], left->elems[0],
- left->kids[1], left->counts[1], left->elems[1],
- left->kids[2], left->counts[2], left->elems[2],
- left->kids[3], left->counts[3]));
-
-}
-
-/*
- * Delete an element e in a 2-3-4 tree. Does not free the element,
- * merely removes all links to it from the tree nodes.
- */
-static void *delpos234_internal(tree234 * t, int index)
-{
- node234 *n;
- void *retval;
- int ki, i;
-
- retval = NULL;
-
- n = t->root; /* by assumption this is non-NULL */
- LOG(("deleting item %d from tree %p\n", index, t));
- while (1)
- {
- node234 *sub;
-
- LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", n, n->kids[0], n->counts[0], n->elems[0], n->kids[1], n->counts[1], n->elems[1], n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3], index));
- if (index <= n->counts[0])
- {
- ki = 0;
- } else if (index -= n->counts[0] + 1, index <= n->counts[1])
- {
- ki = 1;
- } else if (index -= n->counts[1] + 1, index <= n->counts[2])
- {
- ki = 2;
- } else if (index -= n->counts[2] + 1, index <= n->counts[3])
- {
- ki = 3;
- } else
- {
- assert(0); /* can't happen */
- }
-
- if (!n->kids[0])
- break; /* n is a leaf node; we're here! */
-
- /*
- * Check to see if we've found our target element. If so,
- * we must choose a new target (we'll use the old target's
- * successor, which will be in a leaf), move it into the
- * place of the old one, continue down to the leaf and
- * delete the old copy of the new target.
- */
- if (index == n->counts[ki])
- {
- node234 *m;
- LOG((" found element in internal node, index %d\n", ki));
- assert(n->elems[ki]); /* must be a kid _before_ an element */
- ki++;
- index = 0;
- for (m = n->kids[ki]; m->kids[0]; m = m->kids[0])
- continue;
- LOG((" replacing with element \"%s\" from leaf node %p\n",
- m->elems[0], m));
- retval = n->elems[ki - 1];
- n->elems[ki - 1] = m->elems[0];
- }
-
- /*
- * Recurse down to subtree ki. If it has only one element,
- * we have to do some transformation to start with.
- */
- LOG((" moving to subtree %d\n", ki));
- sub = n->kids[ki];
- if (!sub->elems[1])
- {
- LOG((" subtree has only one element!\n"));
- if (ki > 0 && n->kids[ki - 1]->elems[1])
- {
- /*
- * Child ki has only one element, but child
- * ki-1 has two or more. So we need to move a
- * subtree from ki-1 to ki.
- */
- trans234_subtree_right(n, ki - 1, &ki, &index);
- } else if (ki < 3 && n->kids[ki + 1] && n->kids[ki + 1]->elems[1])
- {
- /*
- * Child ki has only one element, but ki+1 has
- * two or more. Move a subtree from ki+1 to ki.
- */
- trans234_subtree_left(n, ki + 1, &ki, &index);
- } else
- {
- /*
- * ki is small with only small neighbours. Pick a
- * neighbour and merge with it.
- */
- trans234_subtree_merge(n, ki > 0 ? ki - 1 : ki, &ki, &index);
- sub = n->kids[ki];
-
- if (!n->elems[0])
- {
- /*
- * The root is empty and needs to be
- * removed.
- */
- LOG((" shifting root!\n"));
- t->root = sub;
- sub->parent = NULL;
- sfree(n);
- n = NULL;
- }
- }
- }
-
- if (n)
- n->counts[ki]--;
- n = sub;
- }
-
- /*
- * Now n is a leaf node, and ki marks the element number we
- * want to delete. We've already arranged for the leaf to be
- * bigger than minimum size, so let's just go to it.
- */
- assert(!n->kids[0]);
- if (!retval)
- retval = n->elems[ki];
-
- for (i = ki; i < 2 && n->elems[i + 1]; i++)
- n->elems[i] = n->elems[i + 1];
- n->elems[i] = NULL;
-
- /*
- * It's just possible that we have reduced the leaf to zero
- * size. This can only happen if it was the root - so destroy
- * it and make the tree empty.
- */
- if (!n->elems[0])
- {
- LOG((" removed last element in tree, destroying empty root\n"));
- assert(n == t->root);
- sfree(n);
- t->root = NULL;
- }
-
- return retval; /* finished! */
-}
-
-void *delpos234(tree234 * t, int index)
-{
- if (index < 0 || index >= countnode234(t->root))
- return NULL;
- return delpos234_internal(t, index);
-}
-
-void *del234(tree234 * t, void *e)
-{
- int index;
- if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
- return NULL; /* it wasn't in there anyway */
- return delpos234_internal(t, index); /* it's there; delete it. */
-}
-
-/*
- * Join two subtrees together with a separator element between
- * them, given their relative height.
- *
- * (Height<0 means the left tree is shorter, >0 means the right
- * tree is shorter, =0 means (duh) they're equal.)
- *
- * It is assumed that any checks needed on the ordering criterion
- * have _already_ been done.
- *
- * The value returned in `height' is 0 or 1 depending on whether the
- * resulting tree is the same height as the original larger one, or
- * one higher.
- */
-static node234 *join234_internal(node234 * left, void *sep,
- node234 * right, int *height)
-{
- node234 *root, *node;
- int relht = *height;
- int ki;
-
- LOG((" join: joining %p \"%s\" %p, relative height is %d\n",
- left, sep, right, relht));
- if (relht == 0)
- {
- /*
- * The trees are the same height. Create a new one-element
- * root containing the separator and pointers to the two
- * nodes.
- */
- node234 *newroot;
- newroot = mknew(node234);
- newroot->kids[0] = left;
- newroot->counts[0] = countnode234(left);
- newroot->elems[0] = sep;
- newroot->kids[1] = right;
- newroot->counts[1] = countnode234(right);
- newroot->elems[1] = NULL;
- newroot->kids[2] = NULL;
- newroot->counts[2] = 0;
- newroot->elems[2] = NULL;
- newroot->kids[3] = NULL;
- newroot->counts[3] = 0;
- newroot->parent = NULL;
- if (left)
- left->parent = newroot;
- if (right)
- right->parent = newroot;
- *height = 1;
- LOG((" join: same height, brand new root\n"));
- return newroot;
- }
-
- /*
- * This now works like the addition algorithm on the larger
- * tree. We're replacing a single kid pointer with two kid
- * pointers separated by an element; if that causes the node to
- * overload, we split it in two, move a separator element up to
- * the next node, and repeat.
- */
- if (relht < 0)
- {
- /*
- * Left tree is shorter. Search down the right tree to find
- * the pointer we're inserting at.
- */
- node = root = right;
- while (++relht < 0)
- {
- node = node->kids[0];
- }
- ki = 0;
- right = node->kids[ki];
- } else
- {
- /*
- * Right tree is shorter; search down the left to find the
- * pointer we're inserting at.
- */
- node = root = left;
- while (--relht > 0)
- {
- if (node->elems[2])
- node = node->kids[3];
- else if (node->elems[1])
- node = node->kids[2];
- else
- node = node->kids[1];
- }
- if (node->elems[2])
- ki = 3;
- else if (node->elems[1])
- ki = 2;
- else
- ki = 1;
- left = node->kids[ki];
- }
-
- /*
- * Now proceed as for addition.
- */
- *height = add234_insert(left, sep, right, &root, node, ki);
-
- return root;
-}
-static int height234(tree234 * t)
-{
- int level = 0;
- node234 *n = t->root;
- while (n)
- {
- level++;
- n = n->kids[0];
- }
- return level;
-}
-
-tree234 *join234(tree234 * t1, tree234 * t2)
-{
- int size2 = countnode234(t2->root);
- if (size2 > 0)
- {
- void *element;
- int relht;
-
- if (t1->cmp)
- {
- element = index234(t2, 0);
- element = findrelpos234(t1, element, NULL, REL234_GE, NULL);
- if (element)
- return NULL;
- }
-
- element = delpos234(t2, 0);
- relht = height234(t1) - height234(t2);
- t1->root = join234_internal(t1->root, element, t2->root, &relht);
- t2->root = NULL;
- }
- return t1;
-}
-
-tree234 *join234r(tree234 * t1, tree234 * t2)
-{
- int size1 = countnode234(t1->root);
- if (size1 > 0)
- {
- void *element;
- int relht;
-
- if (t2->cmp)
- {
- element = index234(t1, size1 - 1);
- element = findrelpos234(t2, element, NULL, REL234_LE, NULL);
- if (element)
- return NULL;
- }
-
- element = delpos234(t1, size1 - 1);
- relht = height234(t1) - height234(t2);
- t2->root = join234_internal(t1->root, element, t2->root, &relht);
- t1->root = NULL;
- }
- return t2;
-}
-
-/*
- * Split out the first <index> elements in a tree and return a
- * pointer to the root node. Leave the root node of the remainder
- * in t.
- */
-static node234 *split234_internal(tree234 * t, int index)
-{
- node234 *halves[2], *n, *sib, *sub;
- node234 *lparent, *rparent;
- int ki, pki=0, i, half, lcount, rcount;
-
- n = t->root;
- LOG(("splitting tree %p at point %d\n", t, index));
-
- /*
- * Easy special cases. After this we have also dealt completely
- * with the empty-tree case and we can assume the root exists.
- */
- if (index == 0) /* return nothing */
- return NULL;
- if (index == countnode234(t->root))
- { /* return the whole tree */
- node234 *ret = t->root;
- t->root = NULL;
- return ret;
- }
-
- /*
- * Search down the tree to find the split point.
- */
- lparent = rparent = NULL;
- while (n)
- {
- LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", n, n->kids[0], n->counts[0], n->elems[0], n->kids[1], n->counts[1], n->elems[1], n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3], index));
- lcount = index;
- rcount = countnode234(n) - lcount;
- if (index <= n->counts[0])
- {
- ki = 0;
- } else if (index -= n->counts[0] + 1, index <= n->counts[1])
- {
- ki = 1;
- } else if (index -= n->counts[1] + 1, index <= n->counts[2])
- {
- ki = 2;
- } else
- {
- index -= n->counts[2] + 1;
- ki = 3;
- }
-
- LOG((" splitting at subtree %d\n", ki));
- sub = n->kids[ki];
-
- LOG((" splitting at child index %d\n", ki));
-
- /*
- * Split the node, put halves[0] on the right of the left
- * one and halves[1] on the left of the right one, put the
- * new node pointers in halves[0] and halves[1], and go up
- * a level.
- */
- sib = mknew(node234);
- for (i = 0; i < 3; i++)
- {
- if (i + ki < 3 && n->elems[i + ki])
- {
- sib->elems[i] = n->elems[i + ki];
- sib->kids[i + 1] = n->kids[i + ki + 1];
- if (sib->kids[i + 1])
- sib->kids[i + 1]->parent = sib;
- sib->counts[i + 1] = n->counts[i + ki + 1];
- n->elems[i + ki] = NULL;
- n->kids[i + ki + 1] = NULL;
- n->counts[i + ki + 1] = 0;
- } else
- {
- sib->elems[i] = NULL;
- sib->kids[i + 1] = NULL;
- sib->counts[i + 1] = 0;
- }
- }
- if (lparent)
- {
- lparent->kids[pki] = n;
- lparent->counts[pki] = lcount;
- n->parent = lparent;
- rparent->kids[0] = sib;
- rparent->counts[0] = rcount;
- sib->parent = rparent;
- } else
- {
- halves[0] = n;
- n->parent = NULL;
- halves[1] = sib;
- sib->parent = NULL;
- }
- lparent = n;
- rparent = sib;
- pki = ki;
- LOG((" left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- n, n->kids[0], n->counts[0], n->elems[0], n->kids[1],
- n->counts[1], n->elems[1], n->kids[2], n->counts[2], n->elems[2],
- n->kids[3], n->counts[3]));
- LOG((" right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n",
- sib, sib->kids[0], sib->counts[0], sib->elems[0], sib->kids[1],
- sib->counts[1], sib->elems[1], sib->kids[2], sib->counts[2],
- sib->elems[2], sib->kids[3], sib->counts[3]));
-
- n = sub;
- }
-
- /*
- * We've come off the bottom here, so we've successfully split
- * the tree into two equally high subtrees. The only problem is
- * that some of the nodes down the fault line will be smaller
- * than the minimum permitted size. (Since this is a 2-3-4
- * tree, that means they'll be zero-element one-child nodes.)
- */
- LOG((" fell off bottom, lroot is %p, rroot is %p\n",
- halves[0], halves[1]));
- lparent->counts[pki] = rparent->counts[0] = 0;
- lparent->kids[pki] = rparent->kids[0] = NULL;
-
- /*
- * So now we go back down the tree from each of the two roots,
- * fixing up undersize nodes.
- */
- for (half = 0; half < 2; half++)
- {
- /*
- * Remove the root if it's undersize (it will contain only
- * one child pointer, so just throw it away and replace it
- * with its child). This might happen several times.
- */
- while (halves[half] && !halves[half]->elems[0])
- {
- LOG((" root %p is undersize, throwing away\n", halves[half]));
- halves[half] = halves[half]->kids[0];
- sfree(halves[half]->parent);
- halves[half]->parent = NULL;
- LOG((" new root is %p\n", halves[half]));
- }
-
- n = halves[half];
- while (n)
- {
- void (*toward) (node234 * n, int ki, int *k, int *index);
- int ni, merge;
-
- /*
- * Now we have a potentially undersize node on the
- * right (if half==0) or left (if half==1). Sort it
- * out, by merging with a neighbour or by transferring
- * subtrees over. At this time we must also ensure that
- * nodes are bigger than minimum, in case we need an
- * element to merge two nodes below.
- */
- LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", n,
- n->kids[0], n->counts[0], n->elems[0], n->kids[1], n->counts[1],
- n->elems[1], n->kids[2], n->counts[2], n->elems[2], n->kids[3],
- n->counts[3]));
- if (half == 1)
- {
- ki = 0; /* the kid we're interested in */
- ni = 1; /* the neighbour */
- merge = 0; /* for merge: leftmost of the two */
- toward = trans234_subtree_left;
- } else
- {
- ki = (n->kids[3] ? 3 : n->kids[2] ? 2 : 1);
- ni = ki - 1;
- merge = ni;
- toward = trans234_subtree_right;
- }
-
- sub = n->kids[ki];
- if (sub && !sub->elems[1])
- {
- /*
- * This node is undersized or minimum-size. If we
- * can merge it with its neighbour, we do so;
- * otherwise we must be able to transfer subtrees
- * over to it until it is greater than minimum
- * size.
- */
- int undersized = (!sub->elems[0]);
- LOG((" child %d is %ssize\n", ki,
- undersized ? "under" : "minimum-"));
- LOG((" neighbour is %s\n",
- n->kids[ni]->elems[2] ? "large" :
- n->kids[ni]->elems[1] ? "medium" : "small"));
- if (!n->kids[ni]->elems[1] ||
- (undersized && !n->kids[ni]->elems[2]))
- {
- /*
- * Neighbour is small, or possibly neighbour is
- * medium and we are undersize.
- */
- trans234_subtree_merge(n, merge, NULL, NULL);
- sub = n->kids[merge];
- if (!n->elems[0])
- {
- /*
- * n is empty, and hence must have been the
- * root and needs to be removed.
- */
- assert(!n->parent);
- LOG((" shifting root!\n"));
- halves[half] = sub;
- halves[half]->parent = NULL;
- sfree(n);
- }
- } else
- {
- /* Neighbour is big enough to move trees over. */
- toward(n, ni, NULL, NULL);
- if (undersized)
- toward(n, ni, NULL, NULL);
- }
- }
- n = sub;
- }
- }
-
- t->root = halves[1];
- return halves[0];
-}
-
-tree234 *splitpos234(tree234 * t, int index, int before)
-{
- tree234 *ret;
- node234 *n;
- int count;
-
- count = countnode234(t->root);
- if (index < 0 || index > count)
- return NULL; /* error */
- ret = newtree234(t->cmp);
- n = split234_internal(t, index);
- if (before)
- {
- /* We want to return the ones before the index. */
- ret->root = n;
- } else
- {
- /*
- * We want to keep the ones before the index and return the
- * ones after.
- */
- ret->root = t->root;
- t->root = n;
- }
- return ret;
-}
-
-tree234 *split234(tree234 * t, void *e, cmpfn234 cmp, int rel)
-{
- int before;
- int index;
-
- assert(rel != REL234_EQ);
-
- if (rel == REL234_GT || rel == REL234_GE)
- {
- before = 1;
- rel = (rel == REL234_GT ? REL234_LE : REL234_LT);
- } else
- {
- before = 0;
- }
- if (!findrelpos234(t, e, cmp, rel, &index))
- index = 0;
-
- return splitpos234(t, index + 1, before);
-}
-
-static node234 *copynode234(node234 * n, copyfn234 copyfn,
- void *copyfnstate)
-{
- int i;
- node234 *n2 = mknew(node234);
-
- for (i = 0; i < 3; i++)
- {
- if (n->elems[i] && copyfn)
- n2->elems[i] = copyfn(copyfnstate, n->elems[i]);
- else
- n2->elems[i] = n->elems[i];
- }
-
- for (i = 0; i < 4; i++)
- {
- if (n->kids[i])
- {
- n2->kids[i] = copynode234(n->kids[i], copyfn, copyfnstate);
- n2->kids[i]->parent = n2;
- } else
- {
- n2->kids[i] = NULL;
- }
- n2->counts[i] = n->counts[i];
- }
-
- return n2;
-}
-
-tree234 *copytree234(tree234 * t, copyfn234 copyfn, void *copyfnstate)
-{
- tree234 *t2;
-
- t2 = newtree234(t->cmp);
- t2->root = copynode234(t->root, copyfn, copyfnstate);
- t2->root->parent = NULL;
-
- return t2;
-}
-
-#ifdef TEST
-
-/*
- * Test code for the 2-3-4 tree. This code maintains an alternative
- * representation of the data in the tree, in an array (using the
- * obvious and slow insert and delete functions). After each tree
- * operation, the verify() function is called, which ensures all
- * the tree properties are preserved:
- * - node->child->parent always equals node
- * - tree->root->parent always equals NULL
- * - number of kids == 0 or number of elements + 1;
- * - tree has the same depth everywhere
- * - every node has at least one element
- * - subtree element counts are accurate
- * - any NULL kid pointer is accompanied by a zero count
- * - in a sorted tree: ordering property between elements of a
- * node and elements of its children is preserved
- * and also ensures the list represented by the tree is the same
- * list it should be. (This last check also doubly verifies the
- * ordering properties, because the `same list it should be' is by
- * definition correctly ordered. It also ensures all nodes are
- * distinct, because the enum functions would get caught in a loop
- * if not.)
- */
-
-#include <stdarg.h>
-
-#define srealloc realloc
-
-/*
- * Error reporting function.
- */
-void error(char *fmt, ...)
-{
- va_list ap;
- printf("ERROR: ");
- va_start(ap, fmt);
- vfprintf(stdout, fmt, ap);
- va_end(ap);
- printf("\n");
-}
-
-/* The array representation of the data. */
-void **array;
-int arraylen, arraysize;
-cmpfn234 cmp;
-
-/* The tree representation of the same data. */
-tree234 *tree;
-
-/*
- * Routines to provide a diagnostic printout of a tree. Currently
- * relies on every element in the tree being a one-character string
- * :-)
- */
-typedef struct {
- char **levels;
-} dispctx;
-
-int dispnode(node234 * n, int level, dispctx * ctx)
-{
- if (level == 0)
- {
- int xpos = strlen(ctx->levels[0]);
- int len;
-
- if (n->elems[2])
- len = sprintf(ctx->levels[0] + xpos, " %s%s%s",
- n->elems[0], n->elems[1], n->elems[2]);
- else if (n->elems[1])
- len = sprintf(ctx->levels[0] + xpos, " %s%s",
- n->elems[0], n->elems[1]);
- else
- len = sprintf(ctx->levels[0] + xpos, " %s", n->elems[0]);
- return xpos + 1 + (len - 1) / 2;
- } else
- {
- int xpos[4], nkids;
- int nodelen, mypos, myleft, x, i;
-
- xpos[0] = dispnode(n->kids[0], level - 3, ctx);
- xpos[1] = dispnode(n->kids[1], level - 3, ctx);
- nkids = 2;
- if (n->kids[2])
- {
- xpos[2] = dispnode(n->kids[2], level - 3, ctx);
- nkids = 3;
- }
- if (n->kids[3])
- {
- xpos[3] = dispnode(n->kids[3], level - 3, ctx);
- nkids = 4;
- }
-
- if (nkids == 4)
- mypos = (xpos[1] + xpos[2]) / 2;
- else if (nkids == 3)
- mypos = xpos[1];
- else
- mypos = (xpos[0] + xpos[1]) / 2;
- nodelen = nkids * 2 - 1;
- myleft = mypos - ((nodelen - 1) / 2);
- assert(myleft >= xpos[0]);
- assert(myleft + nodelen - 1 <= xpos[nkids - 1]);
-
- x = strlen(ctx->levels[level]);
- while (x <= xpos[0] && x < myleft)
- ctx->levels[level][x++] = ' ';
- while (x < myleft)
- ctx->levels[level][x++] = '_';
- if (nkids == 4)
- x += sprintf(ctx->levels[level] + x, ".%s.%s.%s.",
- n->elems[0], n->elems[1], n->elems[2]);
- else if (nkids == 3)
- x += sprintf(ctx->levels[level] + x, ".%s.%s.",
- n->elems[0], n->elems[1]);
- else
- x += sprintf(ctx->levels[level] + x, ".%s.", n->elems[0]);
- while (x < xpos[nkids - 1])
- ctx->levels[level][x++] = '_';
- ctx->levels[level][x] = '\0';
-
- x = strlen(ctx->levels[level - 1]);
- for (i = 0; i < nkids; i++)
- {
- int rpos, pos;
- rpos = xpos[i];
- if (i > 0 && i < nkids - 1)
- pos = myleft + 2 * i;
- else
- pos = rpos;
- if (rpos < pos)
- rpos++;
- while (x < pos && x < rpos)
- ctx->levels[level - 1][x++] = ' ';
- if (x == pos)
- ctx->levels[level - 1][x++] = '|';
- while (x < pos || x < rpos)
- ctx->levels[level - 1][x++] = '_';
- if (x == pos)
- ctx->levels[level - 1][x++] = '|';
- }
- ctx->levels[level - 1][x] = '\0';
-
- x = strlen(ctx->levels[level - 2]);
- for (i = 0; i < nkids; i++)
- {
- int rpos = xpos[i];
-
- while (x < rpos)
- ctx->levels[level - 2][x++] = ' ';
- ctx->levels[level - 2][x++] = '|';
- }
- ctx->levels[level - 2][x] = '\0';
-
- return mypos;
- }
-}
-
-void disptree(tree234 * t)
-{
- dispctx ctx;
- char *leveldata;
- int width = count234(t);
- int ht = height234(t) * 3 - 2;
- int i;
-
- if (!t->root)
- {
- printf("[empty tree]\n");
- }
-
- leveldata = smalloc(ht * (width + 2));
- ctx.levels = smalloc(ht * sizeof(char *));
- for (i = 0; i < ht; i++)
- {
- ctx.levels[i] = leveldata + i * (width + 2);
- ctx.levels[i][0] = '\0';
- }
-
- (void) dispnode(t->root, ht - 1, &ctx);
-
- for (i = ht; i--;)
- printf("%s\n", ctx.levels[i]);
-
- sfree(ctx.levels);
- sfree(leveldata);
-}
-
-typedef struct {
- int treedepth;
- int elemcount;
-} chkctx;
-
-int
-chknode(chkctx * ctx, int level, node234 * node,
- void *lowbound, void *highbound)
-{
- int nkids, nelems;
- int i;
- int count;
-
- /* Count the non-NULL kids. */
- for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
- /* Ensure no kids beyond the first NULL are non-NULL. */
- for (i = nkids; i < 4; i++)
- if (node->kids[i])
- {
- error("node %p: nkids=%d but kids[%d] non-NULL", node, nkids, i);
- } else if (node->counts[i])
- {
- error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
- node, i, i, node->counts[i]);
- }
-
- /* Count the non-NULL elements. */
- for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
- /* Ensure no elements beyond the first NULL are non-NULL. */
- for (i = nelems; i < 3; i++)
- if (node->elems[i])
- {
- error("node %p: nelems=%d but elems[%d] non-NULL", node, nelems, i);
- }
-
- if (nkids == 0)
- {
- /*
- * If nkids==0, this is a leaf node; verify that the tree
- * depth is the same everywhere.
- */
- if (ctx->treedepth < 0)
- ctx->treedepth = level; /* we didn't know the depth yet */
- else if (ctx->treedepth != level)
- error("node %p: leaf at depth %d, previously seen depth %d",
- node, level, ctx->treedepth);
- } else
- {
- /*
- * If nkids != 0, then it should be nelems+1, unless nelems
- * is 0 in which case nkids should also be 0 (and so we
- * shouldn't be in this condition at all).
- */
- int shouldkids = (nelems ? nelems + 1 : 0);
- if (nkids != shouldkids)
- {
- error("node %p: %d elems should mean %d kids but has %d",
- node, nelems, shouldkids, nkids);
- }
- }
-
- /*
- * nelems should be at least 1.
- */
- if (nelems == 0)
- {
- error("node %p: no elems", node, nkids);
- }
-
- /*
- * Add nelems to the running element count of the whole tree.
- */
- ctx->elemcount += nelems;
-
- /*
- * Check ordering property: all elements should be strictly >
- * lowbound, strictly < highbound, and strictly < each other in
- * sequence. (lowbound and highbound are NULL at edges of tree
- * - both NULL at root node - and NULL is considered to be <
- * everything and > everything. IYSWIM.)
- */
- if (cmp)
- {
- for (i = -1; i < nelems; i++)
- {
- void *lower = (i == -1 ? lowbound : node->elems[i]);
- void *higher = (i + 1 == nelems ? highbound : node->elems[i + 1]);
- if (lower && higher && cmp(lower, higher) >= 0)
- {
- error("node %p: kid comparison [%d=%s,%d=%s] failed",
- node, i, lower, i + 1, higher);
- }
- }
- }
-
- /*
- * Check parent pointers: all non-NULL kids should have a
- * parent pointer coming back to this node.
- */
- for (i = 0; i < nkids; i++)
- if (node->kids[i]->parent != node)
- {
- error("node %p kid %d: parent ptr is %p not %p",
- node, i, node->kids[i]->parent, node);
- }
-
-
- /*
- * Now (finally!) recurse into subtrees.
- */
- count = nelems;
-
- for (i = 0; i < nkids; i++)
- {
- void *lower = (i == 0 ? lowbound : node->elems[i - 1]);
- void *higher = (i >= nelems ? highbound : node->elems[i]);
- int subcount = chknode(ctx, level + 1, node->kids[i], lower, higher);
- if (node->counts[i] != subcount)
- {
- error("node %p kid %d: count says %d, subtree really has %d",
- node, i, node->counts[i], subcount);
- }
- count += subcount;
- }
-
- return count;
-}
-
-void verifytree(tree234 * tree, void **array, int arraylen)
-{
- chkctx ctx;
- int i;
- void *p;
-
- ctx.treedepth = -1; /* depth unknown yet */
- ctx.elemcount = 0; /* no elements seen yet */
- /*
- * Verify validity of tree properties.
- */
- if (tree->root)
- {
- if (tree->root->parent != NULL)
- error("root->parent is %p should be null", tree->root->parent);
- chknode(&ctx, 0, tree->root, NULL, NULL);
- }
- printf("tree depth: %d\n", ctx.treedepth);
- /*
- * Enumerate the tree and ensure it matches up to the array.
- */
- for (i = 0; NULL != (p = index234(tree, i)); i++)
- {
- if (i >= arraylen)
- error("tree contains more than %d elements", arraylen);
- if (array[i] != p)
- error("enum at position %d: array says %s, tree says %s",
- i, array[i], p);
- }
- if (ctx.elemcount != i)
- {
- error("tree really contains %d elements, enum gave %d",
- ctx.elemcount, i);
- }
- if (i < arraylen)
- {
- error("enum gave only %d elements, array has %d", i, arraylen);
- }
- i = count234(tree);
- if (ctx.elemcount != i)
- {
- error("tree really contains %d elements, count234 gave %d",
- ctx.elemcount, i);
- }
-}
-void verify(void)
-{
- verifytree(tree, array, arraylen);
-}
-
-void internal_addtest(void *elem, int index, void *realret)
-{
- int i, j;
- void *retval;
-
- if (arraysize < arraylen + 1)
- {
- arraysize = arraylen + 1 + 256;
- array = (array == NULL ? smalloc(arraysize * sizeof(*array)) :
- srealloc(array, arraysize * sizeof(*array)));
- }
-
- i = index;
- /* now i points to the first element >= elem */
- retval = elem; /* expect elem returned (success) */
- for (j = arraylen; j > i; j--)
- array[j] = array[j - 1];
- array[i] = elem; /* add elem to array */
- arraylen++;
-
- if (realret != retval)
- {
- error("add: retval was %p expected %p", realret, retval);
- }
-
- verify();
-}
-
-void addtest(void *elem)
-{
- int i;
- void *realret;
-
- realret = add234(tree, elem);
-
- i = 0;
- while (i < arraylen && cmp(elem, array[i]) > 0)
- i++;
- if (i < arraylen && !cmp(elem, array[i]))
- {
- void *retval = array[i]; /* expect that returned not elem */
- if (realret != retval)
- {
- error("add: retval was %p expected %p", realret, retval);
- }
- } else
- internal_addtest(elem, i, realret);
-}
-
-void addpostest(void *elem, int i)
-{
- void *realret;
-
- realret = addpos234(tree, elem, i);
-
- internal_addtest(elem, i, realret);
-}
-
-void delpostest(int i)
-{
- int index = i;
- void *elem = array[i], *ret;
-
- /* i points to the right element */
- while (i < arraylen - 1)
- {
- array[i] = array[i + 1];
- i++;
- }
- arraylen--; /* delete elem from array */
-
- if (tree->cmp)
- ret = del234(tree, elem);
- else
- ret = delpos234(tree, index);
-
- if (ret != elem)
- {
- error("del returned %p, expected %p", ret, elem);
- }
-
- verify();
-}
-
-void deltest(void *elem)
-{
- int i;
-
- i = 0;
- while (i < arraylen && cmp(elem, array[i]) > 0)
- i++;
- if (i >= arraylen || cmp(elem, array[i]) != 0)
- return; /* don't do it! */
- delpostest(i);
-}
-
-/* A sample data set and test utility. Designed for pseudo-randomness,
- * and yet repeatability. */
-
-/*
- * This random number generator uses the `portable implementation'
- * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
- * change it if not.
- */
-int randomnumber(unsigned *seed)
-{
- *seed *= 1103515245;
- *seed += 12345;
- return ((*seed) / 65536) % 32768;
-}
-
-int mycmp(void *av, void *bv)
-{
- char const *a = (char const *) av;
- char const *b = (char const *) bv;
- return strcmp(a, b);
-}
-
-#define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
-
-char *strings[] = {
- "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i",
- "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E",
- "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u",
- "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y",
- "m", "s", "l", "4",
-#if 0
- "a", "ab", "absque", "coram", "de",
- "palam", "clam", "cum", "ex", "e",
- "sine", "tenus", "pro", "prae",
- "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
- "penguin", "blancmange", "pangolin", "whale", "hedgehog",
- "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
- "murfl", "spoo", "breen", "flarn", "octothorpe",
- "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
- "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
- "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
- "wand", "ring", "amulet"
-#endif
-};
-
-#define NSTR lenof(strings)
-
-void findtest(void)
-{
- static const int rels[] = {
- REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
- };
- static const char *const relnames[] = {
- "EQ", "GE", "LE", "LT", "GT"
- };
- int i, j, rel, index;
- char *p, *ret, *realret, *realret2;
- int lo, hi, mid, c;
-
- for (i = 0; i < (int) NSTR; i++)
- {
- p = strings[i];
- for (j = 0; j < (int) (sizeof(rels) / sizeof(*rels)); j++)
- {
- rel = rels[j];
-
- lo = 0;
- hi = arraylen - 1;
- while (lo <= hi)
- {
- mid = (lo + hi) / 2;
- c = strcmp(p, array[mid]);
- if (c < 0)
- hi = mid - 1;
- else if (c > 0)
- lo = mid + 1;
- else
- break;
- }
-
- if (c == 0)
- {
- if (rel == REL234_LT)
- ret = (mid > 0 ? array[--mid] : NULL);
- else if (rel == REL234_GT)
- ret = (mid < arraylen - 1 ? array[++mid] : NULL);
- else
- ret = array[mid];
- } else
- {
- assert(lo == hi + 1);
- if (rel == REL234_LT || rel == REL234_LE)
- {
- mid = hi;
- ret = (hi >= 0 ? array[hi] : NULL);
- } else if (rel == REL234_GT || rel == REL234_GE)
- {
- mid = lo;
- ret = (lo < arraylen ? array[lo] : NULL);
- } else
- ret = NULL;
- }
-
- realret = findrelpos234(tree, p, NULL, rel, &index);
- if (realret != ret)
- {
- error("find(\"%s\",%s) gave %s should be %s",
- p, relnames[j], realret, ret);
- }
- if (realret && index != mid)
- {
- error("find(\"%s\",%s) gave %d should be %d",
- p, relnames[j], index, mid);
- }
- if (realret && rel == REL234_EQ)
- {
- realret2 = index234(tree, index);
- if (realret2 != realret)
- {
- error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
- p, relnames[j], realret, index, index, realret2);
- }
- }
-#if 0
- printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
- realret, index);
-#endif
- }
- }
-
- realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
- if (arraylen && (realret != array[0] || index != 0))
- {
- error("find(NULL,GT) gave %s(%d) should be %s(0)",
- realret, index, array[0]);
- } else if (!arraylen && (realret != NULL))
- {
- error("find(NULL,GT) gave %s(%d) should be NULL", realret, index);
- }
-
- realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
- if (arraylen
- && (realret != array[arraylen - 1] || index != arraylen - 1))
- {
- error("find(NULL,LT) gave %s(%d) should be %s(0)", realret, index,
- array[arraylen - 1]);
- } else if (!arraylen && (realret != NULL))
- {
- error("find(NULL,LT) gave %s(%d) should be NULL", realret, index);
- }
-}
-
-void splittest(tree234 * tree, void **array, int arraylen)
-{
- int i;
- tree234 *tree3, *tree4;
- for (i = 0; i <= arraylen; i++)
- {
- tree3 = copytree234(tree, NULL, NULL);
- tree4 = splitpos234(tree3, i, 0);
- verifytree(tree3, array, i);
- verifytree(tree4, array + i, arraylen - i);
- join234(tree3, tree4);
- freetree234(tree4); /* left empty by join */
- verifytree(tree3, array, arraylen);
- freetree234(tree3);
- }
-}
-
-int main(void)
-{
- int in[NSTR];
- int i, j, k;
- int tworoot, tmplen;
- unsigned seed = 0;
- tree234 *tree2, *tree3, *tree4;
- int c;
-
- setvbuf(stdout, NULL, _IOLBF, 0);
-
- for (i = 0; i < (int) NSTR; i++)
- in[i] = 0;
- array = NULL;
- arraylen = arraysize = 0;
- tree = newtree234(mycmp);
- cmp = mycmp;
-
- verify();
- for (i = 0; i < 10000; i++)
- {
- j = randomnumber(&seed);
- j %= NSTR;
- printf("trial: %d\n", i);
- if (in[j])
- {
- printf("deleting %s (%d)\n", strings[j], j);
- deltest(strings[j]);
- in[j] = 0;
- } else
- {
- printf("adding %s (%d)\n", strings[j], j);
- addtest(strings[j]);
- in[j] = 1;
- }
- disptree(tree);
- findtest();
- }
-
- while (arraylen > 0)
- {
- j = randomnumber(&seed);
- j %= arraylen;
- deltest(array[j]);
- }
-
- freetree234(tree);
-
- /*
- * Now try an unsorted tree. We don't really need to test
- * delpos234 because we know del234 is based on it, so it's
- * already been tested in the above sorted-tree code; but for
- * completeness we'll use it to tear down our unsorted tree
- * once we've built it.
- */
- tree = newtree234(NULL);
- cmp = NULL;
- verify();
- for (i = 0; i < 1000; i++)
- {
- printf("trial: %d\n", i);
- j = randomnumber(&seed);
- j %= NSTR;
- k = randomnumber(&seed);
- k %= count234(tree) + 1;
- printf("adding string %s at index %d\n", strings[j], k);
- addpostest(strings[j], k);
- }
-
- /*
- * While we have this tree in its full form, we'll take a copy
- * of it to use in split and join testing.
- */
- tree2 = copytree234(tree, NULL, NULL);
- verifytree(tree2, array, arraylen); /* check the copy is accurate */
- /*
- * Split tests. Split the tree at every possible point and
- * check the resulting subtrees.
- */
- tworoot = (!tree2->root->elems[1]); /* see if it has a 2-root */
- splittest(tree2, array, arraylen);
- /*
- * Now do the split test again, but on a tree that has a 2-root
- * (if the previous one didn't) or doesn't (if the previous one
- * did).
- */
- tmplen = arraylen;
- while ((!tree2->root->elems[1]) == tworoot)
- {
- delpos234(tree2, --tmplen);
- }
- printf("now trying splits on second tree\n");
- splittest(tree2, array, tmplen);
- freetree234(tree2);
-
- /*
- * Back to the main testing of uncounted trees.
- */
- while (count234(tree) > 0)
- {
- printf("cleanup: tree size %d\n", count234(tree));
- j = randomnumber(&seed);
- j %= count234(tree);
- printf("deleting string %s from index %d\n", (char *) array[j], j);
- delpostest(j);
- }
- freetree234(tree);
-
- /*
- * Finally, do some testing on split/join on _sorted_ trees. At
- * the same time, we'll be testing split on very small trees.
- */
- tree = newtree234(mycmp);
- cmp = mycmp;
- arraylen = 0;
- for (i = 0; i < 16; i++)
- {
- addtest(strings[i]);
- tree2 = copytree234(tree, NULL, NULL);
- splittest(tree2, array, arraylen);
- freetree234(tree2);
- }
- freetree234(tree);
-
- /*
- * Test silly cases of join: join(emptytree, emptytree), and
- * also ensure join correctly spots when sorted trees fail the
- * ordering constraint.
- */
- tree = newtree234(mycmp);
- tree2 = newtree234(mycmp);
- tree3 = newtree234(mycmp);
- tree4 = newtree234(mycmp);
- assert(mycmp(strings[0], strings[1]) < 0); /* just in case :-) */
- add234(tree2, strings[1]);
- add234(tree4, strings[0]);
- array[0] = strings[0];
- array[1] = strings[1];
- verifytree(tree, array, 0);
- verifytree(tree2, array + 1, 1);
- verifytree(tree3, array, 0);
- verifytree(tree4, array, 1);
-
- /*
- * So:
- * - join(tree,tree3) should leave both tree and tree3 unchanged.
- * - joinr(tree,tree2) should leave both tree and tree2 unchanged.
- * - join(tree4,tree3) should leave both tree3 and tree4 unchanged.
- * - join(tree, tree2) should move the element from tree2 to tree.
- * - joinr(tree4, tree3) should move the element from tree4 to tree3.
- * - join(tree,tree3) should return NULL and leave both unchanged.
- * - join(tree3,tree) should work and create a bigger tree in tree3.
- */
- assert(tree == join234(tree, tree3));
- verifytree(tree, array, 0);
- verifytree(tree3, array, 0);
- assert(tree2 == join234r(tree, tree2));
- verifytree(tree, array, 0);
- verifytree(tree2, array + 1, 1);
- assert(tree4 == join234(tree4, tree3));
- verifytree(tree3, array, 0);
- verifytree(tree4, array, 1);
- assert(tree == join234(tree, tree2));
- verifytree(tree, array + 1, 1);
- verifytree(tree2, array, 0);
- assert(tree3 == join234r(tree4, tree3));
- verifytree(tree3, array, 1);
- verifytree(tree4, array, 0);
- assert(NULL == join234(tree, tree3));
- verifytree(tree, array + 1, 1);
- verifytree(tree3, array, 1);
- assert(tree3 == join234(tree3, tree));
- verifytree(tree3, array, 2);
- verifytree(tree, array, 0);
-
- return 0;
-}
-
-#endif
-
-#if 0 /* sorted list of strings might be useful */
-{
-"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D",
- "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
- "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b",
- "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n",
- "o", "p", "q", "r", "s", "t", "u", "v", "w", "x",}
-#endif
+/* + * tree234.c: reasonably generic counted 2-3-4 tree routines. + * + * This file is copyright 1999-2001 Simon Tatham. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +#include "tree234.h" + +#define smalloc malloc +#define sfree free + +#define mknew(typ) ( (typ *) smalloc (sizeof (typ)) ) + +#ifdef TEST +#define LOG(x) (printf x) +#else +#define LOG(x) +#endif + +typedef struct node234_Tag node234; + +struct tree234_Tag { + node234 *root; + cmpfn234 cmp; +}; + +struct node234_Tag { + node234 *parent; + node234 *kids[4]; + int counts[4]; + void *elems[3]; +}; + +/* + * Create a 2-3-4 tree. + */ +tree234 *newtree234(cmpfn234 cmp) +{ + tree234 *ret = mknew(tree234); + LOG(("created tree %p\n", ret)); + ret->root = NULL; + ret->cmp = cmp; + return ret; +} + +/* + * Free a 2-3-4 tree (not including freeing the elements). + */ +static void freenode234(node234 * n) +{ + if (!n) + return; + freenode234(n->kids[0]); + freenode234(n->kids[1]); + freenode234(n->kids[2]); + freenode234(n->kids[3]); + sfree(n); +} + +void freetree234(tree234 * t) +{ + freenode234(t->root); + sfree(t); +} + +/* + * Internal function to count a node. + */ +static int countnode234(node234 * n) +{ + int count = 0; + int i; + if (!n) + return 0; + for (i = 0; i < 4; i++) + count += n->counts[i]; + for (i = 0; i < 3; i++) + if (n->elems[i]) + count++; + return count; +} + +/* + * Count the elements in a tree. + */ +int count234(tree234 * t) +{ + if (t->root) + return countnode234(t->root); + else + return 0; +} + +/* + * Propagate a node overflow up a tree until it stops. Returns 0 or + * 1, depending on whether the root had to be split or not. + */ +static int +add234_insert(node234 * left, void *e, node234 * right, + node234 ** root, node234 * n, int ki) +{ + int lcount, rcount; + /* + * We need to insert the new left/element/right set in n at + * child position ki. + */ + lcount = countnode234(left); + rcount = countnode234(right); + while (n) + { + LOG((" at %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG((" need to insert %p/%d \"%s\" %p/%d at position %d\n", left, + lcount, e, right, rcount, ki)); + if (n->elems[1] == NULL) + { + /* + * Insert in a 2-node; simple. + */ + if (ki == 0) + { + LOG((" inserting on left of 2-node\n")); + n->kids[2] = n->kids[1]; + n->counts[2] = n->counts[1]; + n->elems[1] = n->elems[0]; + n->kids[1] = right; + n->counts[1] = rcount; + n->elems[0] = e; + n->kids[0] = left; + n->counts[0] = lcount; + } else + { /* ki == 1 */ + LOG((" inserting on right of 2-node\n")); + n->kids[2] = right; + n->counts[2] = rcount; + n->elems[1] = e; + n->kids[1] = left; + n->counts[1] = lcount; + } + if (n->kids[0]) + n->kids[0]->parent = n; + if (n->kids[1]) + n->kids[1]->parent = n; + if (n->kids[2]) + n->kids[2]->parent = n; + LOG((" done\n")); + break; + } else if (n->elems[2] == NULL) + { + /* + * Insert in a 3-node; simple. + */ + if (ki == 0) + { + LOG((" inserting on left of 3-node\n")); + n->kids[3] = n->kids[2]; + n->counts[3] = n->counts[2]; + n->elems[2] = n->elems[1]; + n->kids[2] = n->kids[1]; + n->counts[2] = n->counts[1]; + n->elems[1] = n->elems[0]; + n->kids[1] = right; + n->counts[1] = rcount; + n->elems[0] = e; + n->kids[0] = left; + n->counts[0] = lcount; + } else if (ki == 1) + { + LOG((" inserting in middle of 3-node\n")); + n->kids[3] = n->kids[2]; + n->counts[3] = n->counts[2]; + n->elems[2] = n->elems[1]; + n->kids[2] = right; + n->counts[2] = rcount; + n->elems[1] = e; + n->kids[1] = left; + n->counts[1] = lcount; + } else + { /* ki == 2 */ + LOG((" inserting on right of 3-node\n")); + n->kids[3] = right; + n->counts[3] = rcount; + n->elems[2] = e; + n->kids[2] = left; + n->counts[2] = lcount; + } + if (n->kids[0]) + n->kids[0]->parent = n; + if (n->kids[1]) + n->kids[1]->parent = n; + if (n->kids[2]) + n->kids[2]->parent = n; + if (n->kids[3]) + n->kids[3]->parent = n; + LOG((" done\n")); + break; + } else + { + node234 *m = mknew(node234); + m->parent = n->parent; + LOG((" splitting a 4-node; created new node %p\n", m)); + /* + * Insert in a 4-node; split into a 2-node and a + * 3-node, and move focus up a level. + * + * I don't think it matters which way round we put the + * 2 and the 3. For simplicity, we'll put the 3 first + * always. + */ + if (ki == 0) + { + m->kids[0] = left; + m->counts[0] = lcount; + m->elems[0] = e; + m->kids[1] = right; + m->counts[1] = rcount; + m->elems[1] = n->elems[0]; + m->kids[2] = n->kids[1]; + m->counts[2] = n->counts[1]; + e = n->elems[1]; + n->kids[0] = n->kids[2]; + n->counts[0] = n->counts[2]; + n->elems[0] = n->elems[2]; + n->kids[1] = n->kids[3]; + n->counts[1] = n->counts[3]; + } else if (ki == 1) + { + m->kids[0] = n->kids[0]; + m->counts[0] = n->counts[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = left; + m->counts[1] = lcount; + m->elems[1] = e; + m->kids[2] = right; + m->counts[2] = rcount; + e = n->elems[1]; + n->kids[0] = n->kids[2]; + n->counts[0] = n->counts[2]; + n->elems[0] = n->elems[2]; + n->kids[1] = n->kids[3]; + n->counts[1] = n->counts[3]; + } else if (ki == 2) + { + m->kids[0] = n->kids[0]; + m->counts[0] = n->counts[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = n->kids[1]; + m->counts[1] = n->counts[1]; + m->elems[1] = n->elems[1]; + m->kids[2] = left; + m->counts[2] = lcount; + /* e = e; */ + n->kids[0] = right; + n->counts[0] = rcount; + n->elems[0] = n->elems[2]; + n->kids[1] = n->kids[3]; + n->counts[1] = n->counts[3]; + } else + { /* ki == 3 */ + m->kids[0] = n->kids[0]; + m->counts[0] = n->counts[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = n->kids[1]; + m->counts[1] = n->counts[1]; + m->elems[1] = n->elems[1]; + m->kids[2] = n->kids[2]; + m->counts[2] = n->counts[2]; + n->kids[0] = left; + n->counts[0] = lcount; + n->elems[0] = e; + n->kids[1] = right; + n->counts[1] = rcount; + e = n->elems[2]; + } + m->kids[3] = n->kids[3] = n->kids[2] = NULL; + m->counts[3] = n->counts[3] = n->counts[2] = 0; + m->elems[2] = n->elems[2] = n->elems[1] = NULL; + if (m->kids[0]) + m->kids[0]->parent = m; + if (m->kids[1]) + m->kids[1]->parent = m; + if (m->kids[2]) + m->kids[2]->parent = m; + if (n->kids[0]) + n->kids[0]->parent = n; + if (n->kids[1]) + n->kids[1]->parent = n; + LOG((" left (%p): %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", m, + m->kids[0], m->counts[0], m->elems[0], + m->kids[1], m->counts[1], m->elems[1], + m->kids[2], m->counts[2])); + LOG((" right (%p): %p/%d \"%s\" %p/%d\n", n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1])); + left = m; + lcount = countnode234(left); + right = n; + rcount = countnode234(right); + } + if (n->parent) + ki = (n->parent->kids[0] == n ? 0 : + n->parent->kids[1] == n ? 1 : n->parent->kids[2] == n ? 2 : 3); + n = n->parent; + } + + /* + * If we've come out of here by `break', n will still be + * non-NULL and all we need to do is go back up the tree + * updating counts. If we've come here because n is NULL, we + * need to create a new root for the tree because the old one + * has just split into two. */ + if (n) + { + while (n->parent) + { + int count = countnode234(n); + int childnum; + childnum = (n->parent->kids[0] == n ? 0 : + n->parent->kids[1] == n ? 1 : + n->parent->kids[2] == n ? 2 : 3); + n->parent->counts[childnum] = count; + n = n->parent; + } + return 0; /* root unchanged */ + } else + { + LOG((" root is overloaded, split into two\n")); + (*root) = mknew(node234); + (*root)->kids[0] = left; + (*root)->counts[0] = lcount; + (*root)->elems[0] = e; + (*root)->kids[1] = right; + (*root)->counts[1] = rcount; + (*root)->elems[1] = NULL; + (*root)->kids[2] = NULL; + (*root)->counts[2] = 0; + (*root)->elems[2] = NULL; + (*root)->kids[3] = NULL; + (*root)->counts[3] = 0; + (*root)->parent = NULL; + if ((*root)->kids[0]) + (*root)->kids[0]->parent = (*root); + if ((*root)->kids[1]) + (*root)->kids[1]->parent = (*root); + LOG((" new root is %p/%d \"%s\" %p/%d\n", + (*root)->kids[0], (*root)->counts[0], + (*root)->elems[0], (*root)->kids[1], (*root)->counts[1])); + return 1; /* root moved */ + } +} + +/* + * Add an element e to a 2-3-4 tree t. Returns e on success, or if + * an existing element compares equal, returns that. + */ +static void *add234_internal(tree234 * t, void *e, int index) +{ + node234 *n; + int ki; + void *orig_e = e; + int c; + + LOG(("adding element \"%s\" to tree %p\n", e, t)); + if (t->root == NULL) + { + t->root = mknew(node234); + t->root->elems[1] = t->root->elems[2] = NULL; + t->root->kids[0] = t->root->kids[1] = NULL; + t->root->kids[2] = t->root->kids[3] = NULL; + t->root->counts[0] = t->root->counts[1] = 0; + t->root->counts[2] = t->root->counts[3] = 0; + t->root->parent = NULL; + t->root->elems[0] = e; + LOG((" created root %p\n", t->root)); + return orig_e; + } + + n = t->root; + while (n) + { + LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + if (index >= 0) + { + if (!n->kids[0]) + { + /* + * Leaf node. We want to insert at kid position + * equal to the index: + * + * 0 A 1 B 2 C 3 + */ + ki = index; + } else + { + /* + * Internal node. We always descend through it (add + * always starts at the bottom, never in the + * middle). + */ + if (index <= n->counts[0]) + { + ki = 0; + } else if (index -= n->counts[0] + 1, index <= n->counts[1]) + { + ki = 1; + } else if (index -= n->counts[1] + 1, index <= n->counts[2]) + { + ki = 2; + } else if (index -= n->counts[2] + 1, index <= n->counts[3]) + { + ki = 3; + } else + return NULL; /* error: index out of range */ + } + } else + { + if ((c = t->cmp(e, n->elems[0])) < 0) + ki = 0; + else if (c == 0) + return n->elems[0]; /* already exists */ + else if (n->elems[1] == NULL || (c = t->cmp(e, n->elems[1])) < 0) + ki = 1; + else if (c == 0) + return n->elems[1]; /* already exists */ + else if (n->elems[2] == NULL || (c = t->cmp(e, n->elems[2])) < 0) + ki = 2; + else if (c == 0) + return n->elems[2]; /* already exists */ + else + ki = 3; + } + LOG((" moving to child %d (%p)\n", ki, n->kids[ki])); + if (!n->kids[ki]) + break; + n = n->kids[ki]; + } + + add234_insert(NULL, e, NULL, &t->root, n, ki); + + return orig_e; +} + +void *add234(tree234 * t, void *e) +{ + if (!t->cmp) /* tree is unsorted */ + return NULL; + + return add234_internal(t, e, -1); +} + +void *addpos234(tree234 * t, void *e, int index) +{ + if (index < 0 || /* index out of range */ + t->cmp) /* tree is sorted */ + return NULL; /* return failure */ + + return add234_internal(t, e, index); /* this checks the upper bound */ +} + +/* + * Look up the element at a given numeric index in a 2-3-4 tree. + * Returns NULL if the index is out of range. + */ +void *index234(tree234 * t, int index) +{ + node234 *n; + + if (!t->root) + return NULL; /* tree is empty */ + + if (index < 0 || index >= countnode234(t->root)) + return NULL; /* out of range */ + + n = t->root; + + while (n) + { + if (index < n->counts[0]) + n = n->kids[0]; + else if (index -= n->counts[0] + 1, index < 0) + return n->elems[0]; + else if (index < n->counts[1]) + n = n->kids[1]; + else if (index -= n->counts[1] + 1, index < 0) + return n->elems[1]; + else if (index < n->counts[2]) + n = n->kids[2]; + else if (index -= n->counts[2] + 1, index < 0) + return n->elems[2]; + else + n = n->kids[3]; + } + + /* We shouldn't ever get here. I wonder how we did. */ + return NULL; +} + +/* + * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not + * found. e is always passed as the first argument to cmp, so cmp + * can be an asymmetric function if desired. cmp can also be passed + * as NULL, in which case the compare function from the tree proper + * will be used. + */ +void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp, int relation, + int *index) +{ + node234 *n; + void *ret; + int c; + int idx, ecount, kcount, cmpret; + + if (t->root == NULL) + return NULL; + + if (cmp == NULL) + cmp = t->cmp; + + n = t->root; + /* + * Attempt to find the element itself. + */ + idx = 0; + ecount = -1; + /* + * Prepare a fake `cmp' result if e is NULL. + */ + cmpret = 0; + if (e == NULL) + { + assert(relation == REL234_LT || relation == REL234_GT); + if (relation == REL234_LT) + cmpret = +1; /* e is a max: always greater */ + else if (relation == REL234_GT) + cmpret = -1; /* e is a min: always smaller */ + } + while (1) + { + for (kcount = 0; kcount < 4; kcount++) + { + if (kcount >= 3 || n->elems[kcount] == NULL || + (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) + { + break; + } + if (n->kids[kcount]) + idx += n->counts[kcount]; + if (c == 0) + { + ecount = kcount; + break; + } + idx++; + } + if (ecount >= 0) + break; + if (n->kids[kcount]) + n = n->kids[kcount]; + else + break; + } + + if (ecount >= 0) + { + /* + * We have found the element we're looking for. It's + * n->elems[ecount], at tree index idx. If our search + * relation is EQ, LE or GE we can now go home. + */ + if (relation != REL234_LT && relation != REL234_GT) + { + if (index) + *index = idx; + return n->elems[ecount]; + } + + /* + * Otherwise, we'll do an indexed lookup for the previous + * or next element. (It would be perfectly possible to + * implement these search types in a non-counted tree by + * going back up from where we are, but far more fiddly.) + */ + if (relation == REL234_LT) + idx--; + else + idx++; + } else + { + /* + * We've found our way to the bottom of the tree and we + * know where we would insert this node if we wanted to: + * we'd put it in in place of the (empty) subtree + * n->kids[kcount], and it would have index idx + * + * But the actual element isn't there. So if our search + * relation is EQ, we're doomed. + */ + if (relation == REL234_EQ) + return NULL; + + /* + * Otherwise, we must do an index lookup for index idx-1 + * (if we're going left - LE or LT) or index idx (if we're + * going right - GE or GT). + */ + if (relation == REL234_LT || relation == REL234_LE) + { + idx--; + } + } + + /* + * We know the index of the element we want; just call index234 + * to do the rest. This will return NULL if the index is out of + * bounds, which is exactly what we want. + */ + ret = index234(t, idx); + if (ret && index) + *index = idx; + return ret; +} + +void *find234(tree234 * t, void *e, cmpfn234 cmp) +{ + return findrelpos234(t, e, cmp, REL234_EQ, NULL); +} + +void *findrel234(tree234 * t, void *e, cmpfn234 cmp, int relation) +{ + return findrelpos234(t, e, cmp, relation, NULL); +} + +void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index) +{ + return findrelpos234(t, e, cmp, REL234_EQ, index); +} + +/* + * Tree transformation used in delete and split: move a subtree + * right, from child ki of a node to the next child. Update k and + * index so that they still point to the same place in the + * transformed tree. Assumes the destination child is not full, and + * that the source child does have a subtree to spare. Can cope if + * the destination child is undersized. + * + * . C . . B . + * / \ -> / \ + * [more] a A b B c d D e [more] a A b c C d D e + * + * . C . . B . + * / \ -> / \ + * [more] a A b B c d [more] a A b c C d + */ +static void trans234_subtree_right(node234 * n, int ki, int *k, int *index) +{ + node234 *src, *dest; + int i, srclen, adjust; + + src = n->kids[ki]; + dest = n->kids[ki + 1]; + + LOG((" trans234_subtree_right(%p, %d):\n", n, ki)); + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); + LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); + /* + * Move over the rest of the destination node to make space. + */ + dest->kids[3] = dest->kids[2]; + dest->counts[3] = dest->counts[2]; + dest->elems[2] = dest->elems[1]; + dest->kids[2] = dest->kids[1]; + dest->counts[2] = dest->counts[1]; + dest->elems[1] = dest->elems[0]; + dest->kids[1] = dest->kids[0]; + dest->counts[1] = dest->counts[0]; + + /* which element to move over */ + i = (src->elems[2] ? 2 : src->elems[1] ? 1 : 0); + + dest->elems[0] = n->elems[ki]; + n->elems[ki] = src->elems[i]; + src->elems[i] = NULL; + + dest->kids[0] = src->kids[i + 1]; + dest->counts[0] = src->counts[i + 1]; + src->kids[i + 1] = NULL; + src->counts[i + 1] = 0; + + if (dest->kids[0]) + dest->kids[0]->parent = dest; + + adjust = dest->counts[0] + 1; + + n->counts[ki] -= adjust; + n->counts[ki + 1] += adjust; + + srclen = n->counts[ki]; + + if (k) + { + LOG((" before: k,index = %d,%d\n", (*k), (*index))); + if ((*k) == ki && (*index) > srclen) + { + (*index) -= srclen + 1; + (*k)++; + } else if ((*k) == ki + 1) + { + (*index) += adjust; + } + LOG((" after: k,index = %d,%d\n", (*k), (*index))); + } + + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); + LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); +} + +/* + * Tree transformation used in delete and split: move a subtree + * left, from child ki of a node to the previous child. Update k + * and index so that they still point to the same place in the + * transformed tree. Assumes the destination child is not full, and + * that the source child does have a subtree to spare. Can cope if + * the destination child is undersized. + * + * . B . . C . + * / \ -> / \ + * a A b c C d D e [more] a A b B c d D e [more] + * + * . A . . B . + * / \ -> / \ + * a b B c C d [more] a A b c C d [more] + */ +static void trans234_subtree_left(node234 * n, int ki, int *k, int *index) +{ + node234 *src, *dest; + int i, adjust; + + src = n->kids[ki]; + dest = n->kids[ki - 1]; + + LOG((" trans234_subtree_left(%p, %d):\n", n, ki)); + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); + LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); + + /* where in dest to put it */ + i = (dest->elems[1] ? 2 : dest->elems[0] ? 1 : 0); + dest->elems[i] = n->elems[ki - 1]; + n->elems[ki - 1] = src->elems[0]; + + dest->kids[i + 1] = src->kids[0]; + dest->counts[i + 1] = src->counts[0]; + + if (dest->kids[i + 1]) + dest->kids[i + 1]->parent = dest; + + /* + * Move over the rest of the source node. + */ + src->kids[0] = src->kids[1]; + src->counts[0] = src->counts[1]; + src->elems[0] = src->elems[1]; + src->kids[1] = src->kids[2]; + src->counts[1] = src->counts[2]; + src->elems[1] = src->elems[2]; + src->kids[2] = src->kids[3]; + src->counts[2] = src->counts[3]; + src->elems[2] = NULL; + src->kids[3] = NULL; + src->counts[3] = 0; + + adjust = dest->counts[i + 1] + 1; + + n->counts[ki] -= adjust; + n->counts[ki - 1] += adjust; + + if (k) + { + LOG((" before: k,index = %d,%d\n", (*k), (*index))); + if ((*k) == ki) + { + (*index) -= adjust; + if ((*index) < 0) + { + (*index) += n->counts[ki - 1] + 1; + (*k)--; + } + } + LOG((" after: k,index = %d,%d\n", (*k), (*index))); + } + + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG((" dest %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + dest, + dest->kids[0], dest->counts[0], dest->elems[0], + dest->kids[1], dest->counts[1], dest->elems[1], + dest->kids[2], dest->counts[2], dest->elems[2], + dest->kids[3], dest->counts[3])); + LOG((" src %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + src, + src->kids[0], src->counts[0], src->elems[0], + src->kids[1], src->counts[1], src->elems[1], + src->kids[2], src->counts[2], src->elems[2], + src->kids[3], src->counts[3])); +} + +/* + * Tree transformation used in delete and split: merge child nodes + * ki and ki+1 of a node. Update k and index so that they still + * point to the same place in the transformed tree. Assumes both + * children _are_ sufficiently small. + * + * . B . . + * / \ -> | + * a A b c C d a A b B c C d + * + * This routine can also cope with either child being undersized: + * + * . A . . + * / \ -> | + * a b B c a A b B c + * + * . A . . + * / \ -> | + * a b B c C d a A b B c C d + */ +static void trans234_subtree_merge(node234 * n, int ki, int *k, int *index) +{ + node234 *left, *right; + int i, leftlen, rightlen, lsize, rsize; + + left = n->kids[ki]; + leftlen = n->counts[ki]; + right = n->kids[ki + 1]; + rightlen = n->counts[ki + 1]; + + LOG((" trans234_subtree_merge(%p, %d):\n", n, ki)); + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG((" left %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + left, + left->kids[0], left->counts[0], left->elems[0], + left->kids[1], left->counts[1], left->elems[1], + left->kids[2], left->counts[2], left->elems[2], + left->kids[3], left->counts[3])); + LOG((" right %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + right, + right->kids[0], right->counts[0], right->elems[0], + right->kids[1], right->counts[1], right->elems[1], + right->kids[2], right->counts[2], right->elems[2], + right->kids[3], right->counts[3])); + + assert(!left->elems[2] && !right->elems[2]); /* neither is large! */ + lsize = (left->elems[1] ? 2 : left->elems[0] ? 1 : 0); + rsize = (right->elems[1] ? 2 : right->elems[0] ? 1 : 0); + + left->elems[lsize] = n->elems[ki]; + + for (i = 0; i < rsize + 1; i++) + { + left->kids[lsize + 1 + i] = right->kids[i]; + left->counts[lsize + 1 + i] = right->counts[i]; + if (left->kids[lsize + 1 + i]) + left->kids[lsize + 1 + i]->parent = left; + if (i < rsize) + left->elems[lsize + 1 + i] = right->elems[i]; + } + + n->counts[ki] += rightlen + 1; + + sfree(right); + + /* + * Move the rest of n up by one. + */ + for (i = ki + 1; i < 3; i++) + { + n->kids[i] = n->kids[i + 1]; + n->counts[i] = n->counts[i + 1]; + } + for (i = ki; i < 2; i++) + { + n->elems[i] = n->elems[i + 1]; + } + n->kids[3] = NULL; + n->counts[3] = 0; + n->elems[2] = NULL; + + if (k) + { + LOG((" before: k,index = %d,%d\n", (*k), (*index))); + if ((*k) == ki + 1) + { + (*k)--; + (*index) += leftlen + 1; + } else if ((*k) > ki + 1) + { + (*k)--; + } + LOG((" after: k,index = %d,%d\n", (*k), (*index))); + } + + LOG((" parent %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, + n->kids[0], n->counts[0], n->elems[0], + n->kids[1], n->counts[1], n->elems[1], + n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3])); + LOG((" merged %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + left, + left->kids[0], left->counts[0], left->elems[0], + left->kids[1], left->counts[1], left->elems[1], + left->kids[2], left->counts[2], left->elems[2], + left->kids[3], left->counts[3])); + +} + +/* + * Delete an element e in a 2-3-4 tree. Does not free the element, + * merely removes all links to it from the tree nodes. + */ +static void *delpos234_internal(tree234 * t, int index) +{ + node234 *n; + void *retval; + int ki, i; + + retval = NULL; + + n = t->root; /* by assumption this is non-NULL */ + LOG(("deleting item %d from tree %p\n", index, t)); + while (1) + { + node234 *sub; + + LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", n, n->kids[0], n->counts[0], n->elems[0], n->kids[1], n->counts[1], n->elems[1], n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3], index)); + if (index <= n->counts[0]) + { + ki = 0; + } else if (index -= n->counts[0] + 1, index <= n->counts[1]) + { + ki = 1; + } else if (index -= n->counts[1] + 1, index <= n->counts[2]) + { + ki = 2; + } else if (index -= n->counts[2] + 1, index <= n->counts[3]) + { + ki = 3; + } else + { + assert(0); /* can't happen */ + } + + if (!n->kids[0]) + break; /* n is a leaf node; we're here! */ + + /* + * Check to see if we've found our target element. If so, + * we must choose a new target (we'll use the old target's + * successor, which will be in a leaf), move it into the + * place of the old one, continue down to the leaf and + * delete the old copy of the new target. + */ + if (index == n->counts[ki]) + { + node234 *m; + LOG((" found element in internal node, index %d\n", ki)); + assert(n->elems[ki]); /* must be a kid _before_ an element */ + ki++; + index = 0; + for (m = n->kids[ki]; m->kids[0]; m = m->kids[0]) + continue; + LOG((" replacing with element \"%s\" from leaf node %p\n", + m->elems[0], m)); + retval = n->elems[ki - 1]; + n->elems[ki - 1] = m->elems[0]; + } + + /* + * Recurse down to subtree ki. If it has only one element, + * we have to do some transformation to start with. + */ + LOG((" moving to subtree %d\n", ki)); + sub = n->kids[ki]; + if (!sub->elems[1]) + { + LOG((" subtree has only one element!\n")); + if (ki > 0 && n->kids[ki - 1]->elems[1]) + { + /* + * Child ki has only one element, but child + * ki-1 has two or more. So we need to move a + * subtree from ki-1 to ki. + */ + trans234_subtree_right(n, ki - 1, &ki, &index); + } else if (ki < 3 && n->kids[ki + 1] && n->kids[ki + 1]->elems[1]) + { + /* + * Child ki has only one element, but ki+1 has + * two or more. Move a subtree from ki+1 to ki. + */ + trans234_subtree_left(n, ki + 1, &ki, &index); + } else + { + /* + * ki is small with only small neighbours. Pick a + * neighbour and merge with it. + */ + trans234_subtree_merge(n, ki > 0 ? ki - 1 : ki, &ki, &index); + sub = n->kids[ki]; + + if (!n->elems[0]) + { + /* + * The root is empty and needs to be + * removed. + */ + LOG((" shifting root!\n")); + t->root = sub; + sub->parent = NULL; + sfree(n); + n = NULL; + } + } + } + + if (n) + n->counts[ki]--; + n = sub; + } + + /* + * Now n is a leaf node, and ki marks the element number we + * want to delete. We've already arranged for the leaf to be + * bigger than minimum size, so let's just go to it. + */ + assert(!n->kids[0]); + if (!retval) + retval = n->elems[ki]; + + for (i = ki; i < 2 && n->elems[i + 1]; i++) + n->elems[i] = n->elems[i + 1]; + n->elems[i] = NULL; + + /* + * It's just possible that we have reduced the leaf to zero + * size. This can only happen if it was the root - so destroy + * it and make the tree empty. + */ + if (!n->elems[0]) + { + LOG((" removed last element in tree, destroying empty root\n")); + assert(n == t->root); + sfree(n); + t->root = NULL; + } + + return retval; /* finished! */ +} + +void *delpos234(tree234 * t, int index) +{ + if (index < 0 || index >= countnode234(t->root)) + return NULL; + return delpos234_internal(t, index); +} + +void *del234(tree234 * t, void *e) +{ + int index; + if (!findrelpos234(t, e, NULL, REL234_EQ, &index)) + return NULL; /* it wasn't in there anyway */ + return delpos234_internal(t, index); /* it's there; delete it. */ +} + +/* + * Join two subtrees together with a separator element between + * them, given their relative height. + * + * (Height<0 means the left tree is shorter, >0 means the right + * tree is shorter, =0 means (duh) they're equal.) + * + * It is assumed that any checks needed on the ordering criterion + * have _already_ been done. + * + * The value returned in `height' is 0 or 1 depending on whether the + * resulting tree is the same height as the original larger one, or + * one higher. + */ +static node234 *join234_internal(node234 * left, void *sep, + node234 * right, int *height) +{ + node234 *root, *node; + int relht = *height; + int ki; + + LOG((" join: joining %p \"%s\" %p, relative height is %d\n", + left, sep, right, relht)); + if (relht == 0) + { + /* + * The trees are the same height. Create a new one-element + * root containing the separator and pointers to the two + * nodes. + */ + node234 *newroot; + newroot = mknew(node234); + newroot->kids[0] = left; + newroot->counts[0] = countnode234(left); + newroot->elems[0] = sep; + newroot->kids[1] = right; + newroot->counts[1] = countnode234(right); + newroot->elems[1] = NULL; + newroot->kids[2] = NULL; + newroot->counts[2] = 0; + newroot->elems[2] = NULL; + newroot->kids[3] = NULL; + newroot->counts[3] = 0; + newroot->parent = NULL; + if (left) + left->parent = newroot; + if (right) + right->parent = newroot; + *height = 1; + LOG((" join: same height, brand new root\n")); + return newroot; + } + + /* + * This now works like the addition algorithm on the larger + * tree. We're replacing a single kid pointer with two kid + * pointers separated by an element; if that causes the node to + * overload, we split it in two, move a separator element up to + * the next node, and repeat. + */ + if (relht < 0) + { + /* + * Left tree is shorter. Search down the right tree to find + * the pointer we're inserting at. + */ + node = root = right; + while (++relht < 0) + { + node = node->kids[0]; + } + ki = 0; + right = node->kids[ki]; + } else + { + /* + * Right tree is shorter; search down the left to find the + * pointer we're inserting at. + */ + node = root = left; + while (--relht > 0) + { + if (node->elems[2]) + node = node->kids[3]; + else if (node->elems[1]) + node = node->kids[2]; + else + node = node->kids[1]; + } + if (node->elems[2]) + ki = 3; + else if (node->elems[1]) + ki = 2; + else + ki = 1; + left = node->kids[ki]; + } + + /* + * Now proceed as for addition. + */ + *height = add234_insert(left, sep, right, &root, node, ki); + + return root; +} +static int height234(tree234 * t) +{ + int level = 0; + node234 *n = t->root; + while (n) + { + level++; + n = n->kids[0]; + } + return level; +} + +tree234 *join234(tree234 * t1, tree234 * t2) +{ + int size2 = countnode234(t2->root); + if (size2 > 0) + { + void *element; + int relht; + + if (t1->cmp) + { + element = index234(t2, 0); + element = findrelpos234(t1, element, NULL, REL234_GE, NULL); + if (element) + return NULL; + } + + element = delpos234(t2, 0); + relht = height234(t1) - height234(t2); + t1->root = join234_internal(t1->root, element, t2->root, &relht); + t2->root = NULL; + } + return t1; +} + +tree234 *join234r(tree234 * t1, tree234 * t2) +{ + int size1 = countnode234(t1->root); + if (size1 > 0) + { + void *element; + int relht; + + if (t2->cmp) + { + element = index234(t1, size1 - 1); + element = findrelpos234(t2, element, NULL, REL234_LE, NULL); + if (element) + return NULL; + } + + element = delpos234(t1, size1 - 1); + relht = height234(t1) - height234(t2); + t2->root = join234_internal(t1->root, element, t2->root, &relht); + t1->root = NULL; + } + return t2; +} + +/* + * Split out the first <index> elements in a tree and return a + * pointer to the root node. Leave the root node of the remainder + * in t. + */ +static node234 *split234_internal(tree234 * t, int index) +{ + node234 *halves[2], *n, *sib, *sub; + node234 *lparent, *rparent; + int ki, pki=0, i, half, lcount, rcount; + + n = t->root; + LOG(("splitting tree %p at point %d\n", t, index)); + + /* + * Easy special cases. After this we have also dealt completely + * with the empty-tree case and we can assume the root exists. + */ + if (index == 0) /* return nothing */ + return NULL; + if (index == countnode234(t->root)) + { /* return the whole tree */ + node234 *ret = t->root; + t->root = NULL; + return ret; + } + + /* + * Search down the tree to find the split point. + */ + lparent = rparent = NULL; + while (n) + { + LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d index=%d\n", n, n->kids[0], n->counts[0], n->elems[0], n->kids[1], n->counts[1], n->elems[1], n->kids[2], n->counts[2], n->elems[2], n->kids[3], n->counts[3], index)); + lcount = index; + rcount = countnode234(n) - lcount; + if (index <= n->counts[0]) + { + ki = 0; + } else if (index -= n->counts[0] + 1, index <= n->counts[1]) + { + ki = 1; + } else if (index -= n->counts[1] + 1, index <= n->counts[2]) + { + ki = 2; + } else + { + index -= n->counts[2] + 1; + ki = 3; + } + + LOG((" splitting at subtree %d\n", ki)); + sub = n->kids[ki]; + + LOG((" splitting at child index %d\n", ki)); + + /* + * Split the node, put halves[0] on the right of the left + * one and halves[1] on the left of the right one, put the + * new node pointers in halves[0] and halves[1], and go up + * a level. + */ + sib = mknew(node234); + for (i = 0; i < 3; i++) + { + if (i + ki < 3 && n->elems[i + ki]) + { + sib->elems[i] = n->elems[i + ki]; + sib->kids[i + 1] = n->kids[i + ki + 1]; + if (sib->kids[i + 1]) + sib->kids[i + 1]->parent = sib; + sib->counts[i + 1] = n->counts[i + ki + 1]; + n->elems[i + ki] = NULL; + n->kids[i + ki + 1] = NULL; + n->counts[i + ki + 1] = 0; + } else + { + sib->elems[i] = NULL; + sib->kids[i + 1] = NULL; + sib->counts[i + 1] = 0; + } + } + if (lparent) + { + lparent->kids[pki] = n; + lparent->counts[pki] = lcount; + n->parent = lparent; + rparent->kids[0] = sib; + rparent->counts[0] = rcount; + sib->parent = rparent; + } else + { + halves[0] = n; + n->parent = NULL; + halves[1] = sib; + sib->parent = NULL; + } + lparent = n; + rparent = sib; + pki = ki; + LOG((" left node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + n, n->kids[0], n->counts[0], n->elems[0], n->kids[1], + n->counts[1], n->elems[1], n->kids[2], n->counts[2], n->elems[2], + n->kids[3], n->counts[3])); + LOG((" right node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", + sib, sib->kids[0], sib->counts[0], sib->elems[0], sib->kids[1], + sib->counts[1], sib->elems[1], sib->kids[2], sib->counts[2], + sib->elems[2], sib->kids[3], sib->counts[3])); + + n = sub; + } + + /* + * We've come off the bottom here, so we've successfully split + * the tree into two equally high subtrees. The only problem is + * that some of the nodes down the fault line will be smaller + * than the minimum permitted size. (Since this is a 2-3-4 + * tree, that means they'll be zero-element one-child nodes.) + */ + LOG((" fell off bottom, lroot is %p, rroot is %p\n", + halves[0], halves[1])); + lparent->counts[pki] = rparent->counts[0] = 0; + lparent->kids[pki] = rparent->kids[0] = NULL; + + /* + * So now we go back down the tree from each of the two roots, + * fixing up undersize nodes. + */ + for (half = 0; half < 2; half++) + { + /* + * Remove the root if it's undersize (it will contain only + * one child pointer, so just throw it away and replace it + * with its child). This might happen several times. + */ + while (halves[half] && !halves[half]->elems[0]) + { + LOG((" root %p is undersize, throwing away\n", halves[half])); + halves[half] = halves[half]->kids[0]; + sfree(halves[half]->parent); + halves[half]->parent = NULL; + LOG((" new root is %p\n", halves[half])); + } + + n = halves[half]; + while (n) + { + void (*toward) (node234 * n, int ki, int *k, int *index); + int ni, merge; + + /* + * Now we have a potentially undersize node on the + * right (if half==0) or left (if half==1). Sort it + * out, by merging with a neighbour or by transferring + * subtrees over. At this time we must also ensure that + * nodes are bigger than minimum, in case we need an + * element to merge two nodes below. + */ + LOG((" node %p: %p/%d \"%s\" %p/%d \"%s\" %p/%d \"%s\" %p/%d\n", n, + n->kids[0], n->counts[0], n->elems[0], n->kids[1], n->counts[1], + n->elems[1], n->kids[2], n->counts[2], n->elems[2], n->kids[3], + n->counts[3])); + if (half == 1) + { + ki = 0; /* the kid we're interested in */ + ni = 1; /* the neighbour */ + merge = 0; /* for merge: leftmost of the two */ + toward = trans234_subtree_left; + } else + { + ki = (n->kids[3] ? 3 : n->kids[2] ? 2 : 1); + ni = ki - 1; + merge = ni; + toward = trans234_subtree_right; + } + + sub = n->kids[ki]; + if (sub && !sub->elems[1]) + { + /* + * This node is undersized or minimum-size. If we + * can merge it with its neighbour, we do so; + * otherwise we must be able to transfer subtrees + * over to it until it is greater than minimum + * size. + */ + int undersized = (!sub->elems[0]); + LOG((" child %d is %ssize\n", ki, + undersized ? "under" : "minimum-")); + LOG((" neighbour is %s\n", + n->kids[ni]->elems[2] ? "large" : + n->kids[ni]->elems[1] ? "medium" : "small")); + if (!n->kids[ni]->elems[1] || + (undersized && !n->kids[ni]->elems[2])) + { + /* + * Neighbour is small, or possibly neighbour is + * medium and we are undersize. + */ + trans234_subtree_merge(n, merge, NULL, NULL); + sub = n->kids[merge]; + if (!n->elems[0]) + { + /* + * n is empty, and hence must have been the + * root and needs to be removed. + */ + assert(!n->parent); + LOG((" shifting root!\n")); + halves[half] = sub; + halves[half]->parent = NULL; + sfree(n); + } + } else + { + /* Neighbour is big enough to move trees over. */ + toward(n, ni, NULL, NULL); + if (undersized) + toward(n, ni, NULL, NULL); + } + } + n = sub; + } + } + + t->root = halves[1]; + return halves[0]; +} + +tree234 *splitpos234(tree234 * t, int index, int before) +{ + tree234 *ret; + node234 *n; + int count; + + count = countnode234(t->root); + if (index < 0 || index > count) + return NULL; /* error */ + ret = newtree234(t->cmp); + n = split234_internal(t, index); + if (before) + { + /* We want to return the ones before the index. */ + ret->root = n; + } else + { + /* + * We want to keep the ones before the index and return the + * ones after. + */ + ret->root = t->root; + t->root = n; + } + return ret; +} + +tree234 *split234(tree234 * t, void *e, cmpfn234 cmp, int rel) +{ + int before; + int index; + + assert(rel != REL234_EQ); + + if (rel == REL234_GT || rel == REL234_GE) + { + before = 1; + rel = (rel == REL234_GT ? REL234_LE : REL234_LT); + } else + { + before = 0; + } + if (!findrelpos234(t, e, cmp, rel, &index)) + index = 0; + + return splitpos234(t, index + 1, before); +} + +static node234 *copynode234(node234 * n, copyfn234 copyfn, + void *copyfnstate) +{ + int i; + node234 *n2 = mknew(node234); + + for (i = 0; i < 3; i++) + { + if (n->elems[i] && copyfn) + n2->elems[i] = copyfn(copyfnstate, n->elems[i]); + else + n2->elems[i] = n->elems[i]; + } + + for (i = 0; i < 4; i++) + { + if (n->kids[i]) + { + n2->kids[i] = copynode234(n->kids[i], copyfn, copyfnstate); + n2->kids[i]->parent = n2; + } else + { + n2->kids[i] = NULL; + } + n2->counts[i] = n->counts[i]; + } + + return n2; +} + +tree234 *copytree234(tree234 * t, copyfn234 copyfn, void *copyfnstate) +{ + tree234 *t2; + + t2 = newtree234(t->cmp); + t2->root = copynode234(t->root, copyfn, copyfnstate); + t2->root->parent = NULL; + + return t2; +} + +#ifdef TEST + +/* + * Test code for the 2-3-4 tree. This code maintains an alternative + * representation of the data in the tree, in an array (using the + * obvious and slow insert and delete functions). After each tree + * operation, the verify() function is called, which ensures all + * the tree properties are preserved: + * - node->child->parent always equals node + * - tree->root->parent always equals NULL + * - number of kids == 0 or number of elements + 1; + * - tree has the same depth everywhere + * - every node has at least one element + * - subtree element counts are accurate + * - any NULL kid pointer is accompanied by a zero count + * - in a sorted tree: ordering property between elements of a + * node and elements of its children is preserved + * and also ensures the list represented by the tree is the same + * list it should be. (This last check also doubly verifies the + * ordering properties, because the `same list it should be' is by + * definition correctly ordered. It also ensures all nodes are + * distinct, because the enum functions would get caught in a loop + * if not.) + */ + +#include <stdarg.h> + +#define srealloc realloc + +/* + * Error reporting function. + */ +void error(char *fmt, ...) +{ + va_list ap; + printf("ERROR: "); + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + va_end(ap); + printf("\n"); +} + +/* The array representation of the data. */ +void **array; +int arraylen, arraysize; +cmpfn234 cmp; + +/* The tree representation of the same data. */ +tree234 *tree; + +/* + * Routines to provide a diagnostic printout of a tree. Currently + * relies on every element in the tree being a one-character string + * :-) + */ +typedef struct { + char **levels; +} dispctx; + +int dispnode(node234 * n, int level, dispctx * ctx) +{ + if (level == 0) + { + int xpos = strlen(ctx->levels[0]); + int len; + + if (n->elems[2]) + len = sprintf(ctx->levels[0] + xpos, " %s%s%s", + n->elems[0], n->elems[1], n->elems[2]); + else if (n->elems[1]) + len = sprintf(ctx->levels[0] + xpos, " %s%s", + n->elems[0], n->elems[1]); + else + len = sprintf(ctx->levels[0] + xpos, " %s", n->elems[0]); + return xpos + 1 + (len - 1) / 2; + } else + { + int xpos[4], nkids; + int nodelen, mypos, myleft, x, i; + + xpos[0] = dispnode(n->kids[0], level - 3, ctx); + xpos[1] = dispnode(n->kids[1], level - 3, ctx); + nkids = 2; + if (n->kids[2]) + { + xpos[2] = dispnode(n->kids[2], level - 3, ctx); + nkids = 3; + } + if (n->kids[3]) + { + xpos[3] = dispnode(n->kids[3], level - 3, ctx); + nkids = 4; + } + + if (nkids == 4) + mypos = (xpos[1] + xpos[2]) / 2; + else if (nkids == 3) + mypos = xpos[1]; + else + mypos = (xpos[0] + xpos[1]) / 2; + nodelen = nkids * 2 - 1; + myleft = mypos - ((nodelen - 1) / 2); + assert(myleft >= xpos[0]); + assert(myleft + nodelen - 1 <= xpos[nkids - 1]); + + x = strlen(ctx->levels[level]); + while (x <= xpos[0] && x < myleft) + ctx->levels[level][x++] = ' '; + while (x < myleft) + ctx->levels[level][x++] = '_'; + if (nkids == 4) + x += sprintf(ctx->levels[level] + x, ".%s.%s.%s.", + n->elems[0], n->elems[1], n->elems[2]); + else if (nkids == 3) + x += sprintf(ctx->levels[level] + x, ".%s.%s.", + n->elems[0], n->elems[1]); + else + x += sprintf(ctx->levels[level] + x, ".%s.", n->elems[0]); + while (x < xpos[nkids - 1]) + ctx->levels[level][x++] = '_'; + ctx->levels[level][x] = '\0'; + + x = strlen(ctx->levels[level - 1]); + for (i = 0; i < nkids; i++) + { + int rpos, pos; + rpos = xpos[i]; + if (i > 0 && i < nkids - 1) + pos = myleft + 2 * i; + else + pos = rpos; + if (rpos < pos) + rpos++; + while (x < pos && x < rpos) + ctx->levels[level - 1][x++] = ' '; + if (x == pos) + ctx->levels[level - 1][x++] = '|'; + while (x < pos || x < rpos) + ctx->levels[level - 1][x++] = '_'; + if (x == pos) + ctx->levels[level - 1][x++] = '|'; + } + ctx->levels[level - 1][x] = '\0'; + + x = strlen(ctx->levels[level - 2]); + for (i = 0; i < nkids; i++) + { + int rpos = xpos[i]; + + while (x < rpos) + ctx->levels[level - 2][x++] = ' '; + ctx->levels[level - 2][x++] = '|'; + } + ctx->levels[level - 2][x] = '\0'; + + return mypos; + } +} + +void disptree(tree234 * t) +{ + dispctx ctx; + char *leveldata; + int width = count234(t); + int ht = height234(t) * 3 - 2; + int i; + + if (!t->root) + { + printf("[empty tree]\n"); + } + + leveldata = smalloc(ht * (width + 2)); + ctx.levels = smalloc(ht * sizeof(char *)); + for (i = 0; i < ht; i++) + { + ctx.levels[i] = leveldata + i * (width + 2); + ctx.levels[i][0] = '\0'; + } + + (void) dispnode(t->root, ht - 1, &ctx); + + for (i = ht; i--;) + printf("%s\n", ctx.levels[i]); + + sfree(ctx.levels); + sfree(leveldata); +} + +typedef struct { + int treedepth; + int elemcount; +} chkctx; + +int +chknode(chkctx * ctx, int level, node234 * node, + void *lowbound, void *highbound) +{ + int nkids, nelems; + int i; + int count; + + /* Count the non-NULL kids. */ + for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++); + /* Ensure no kids beyond the first NULL are non-NULL. */ + for (i = nkids; i < 4; i++) + if (node->kids[i]) + { + error("node %p: nkids=%d but kids[%d] non-NULL", node, nkids, i); + } else if (node->counts[i]) + { + error("node %p: kids[%d] NULL but count[%d]=%d nonzero", + node, i, i, node->counts[i]); + } + + /* Count the non-NULL elements. */ + for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++); + /* Ensure no elements beyond the first NULL are non-NULL. */ + for (i = nelems; i < 3; i++) + if (node->elems[i]) + { + error("node %p: nelems=%d but elems[%d] non-NULL", node, nelems, i); + } + + if (nkids == 0) + { + /* + * If nkids==0, this is a leaf node; verify that the tree + * depth is the same everywhere. + */ + if (ctx->treedepth < 0) + ctx->treedepth = level; /* we didn't know the depth yet */ + else if (ctx->treedepth != level) + error("node %p: leaf at depth %d, previously seen depth %d", + node, level, ctx->treedepth); + } else + { + /* + * If nkids != 0, then it should be nelems+1, unless nelems + * is 0 in which case nkids should also be 0 (and so we + * shouldn't be in this condition at all). + */ + int shouldkids = (nelems ? nelems + 1 : 0); + if (nkids != shouldkids) + { + error("node %p: %d elems should mean %d kids but has %d", + node, nelems, shouldkids, nkids); + } + } + + /* + * nelems should be at least 1. + */ + if (nelems == 0) + { + error("node %p: no elems", node, nkids); + } + + /* + * Add nelems to the running element count of the whole tree. + */ + ctx->elemcount += nelems; + + /* + * Check ordering property: all elements should be strictly > + * lowbound, strictly < highbound, and strictly < each other in + * sequence. (lowbound and highbound are NULL at edges of tree + * - both NULL at root node - and NULL is considered to be < + * everything and > everything. IYSWIM.) + */ + if (cmp) + { + for (i = -1; i < nelems; i++) + { + void *lower = (i == -1 ? lowbound : node->elems[i]); + void *higher = (i + 1 == nelems ? highbound : node->elems[i + 1]); + if (lower && higher && cmp(lower, higher) >= 0) + { + error("node %p: kid comparison [%d=%s,%d=%s] failed", + node, i, lower, i + 1, higher); + } + } + } + + /* + * Check parent pointers: all non-NULL kids should have a + * parent pointer coming back to this node. + */ + for (i = 0; i < nkids; i++) + if (node->kids[i]->parent != node) + { + error("node %p kid %d: parent ptr is %p not %p", + node, i, node->kids[i]->parent, node); + } + + + /* + * Now (finally!) recurse into subtrees. + */ + count = nelems; + + for (i = 0; i < nkids; i++) + { + void *lower = (i == 0 ? lowbound : node->elems[i - 1]); + void *higher = (i >= nelems ? highbound : node->elems[i]); + int subcount = chknode(ctx, level + 1, node->kids[i], lower, higher); + if (node->counts[i] != subcount) + { + error("node %p kid %d: count says %d, subtree really has %d", + node, i, node->counts[i], subcount); + } + count += subcount; + } + + return count; +} + +void verifytree(tree234 * tree, void **array, int arraylen) +{ + chkctx ctx; + int i; + void *p; + + ctx.treedepth = -1; /* depth unknown yet */ + ctx.elemcount = 0; /* no elements seen yet */ + /* + * Verify validity of tree properties. + */ + if (tree->root) + { + if (tree->root->parent != NULL) + error("root->parent is %p should be null", tree->root->parent); + chknode(&ctx, 0, tree->root, NULL, NULL); + } + printf("tree depth: %d\n", ctx.treedepth); + /* + * Enumerate the tree and ensure it matches up to the array. + */ + for (i = 0; NULL != (p = index234(tree, i)); i++) + { + if (i >= arraylen) + error("tree contains more than %d elements", arraylen); + if (array[i] != p) + error("enum at position %d: array says %s, tree says %s", + i, array[i], p); + } + if (ctx.elemcount != i) + { + error("tree really contains %d elements, enum gave %d", + ctx.elemcount, i); + } + if (i < arraylen) + { + error("enum gave only %d elements, array has %d", i, arraylen); + } + i = count234(tree); + if (ctx.elemcount != i) + { + error("tree really contains %d elements, count234 gave %d", + ctx.elemcount, i); + } +} +void verify(void) +{ + verifytree(tree, array, arraylen); +} + +void internal_addtest(void *elem, int index, void *realret) +{ + int i, j; + void *retval; + + if (arraysize < arraylen + 1) + { + arraysize = arraylen + 1 + 256; + array = (array == NULL ? smalloc(arraysize * sizeof(*array)) : + srealloc(array, arraysize * sizeof(*array))); + } + + i = index; + /* now i points to the first element >= elem */ + retval = elem; /* expect elem returned (success) */ + for (j = arraylen; j > i; j--) + array[j] = array[j - 1]; + array[i] = elem; /* add elem to array */ + arraylen++; + + if (realret != retval) + { + error("add: retval was %p expected %p", realret, retval); + } + + verify(); +} + +void addtest(void *elem) +{ + int i; + void *realret; + + realret = add234(tree, elem); + + i = 0; + while (i < arraylen && cmp(elem, array[i]) > 0) + i++; + if (i < arraylen && !cmp(elem, array[i])) + { + void *retval = array[i]; /* expect that returned not elem */ + if (realret != retval) + { + error("add: retval was %p expected %p", realret, retval); + } + } else + internal_addtest(elem, i, realret); +} + +void addpostest(void *elem, int i) +{ + void *realret; + + realret = addpos234(tree, elem, i); + + internal_addtest(elem, i, realret); +} + +void delpostest(int i) +{ + int index = i; + void *elem = array[i], *ret; + + /* i points to the right element */ + while (i < arraylen - 1) + { + array[i] = array[i + 1]; + i++; + } + arraylen--; /* delete elem from array */ + + if (tree->cmp) + ret = del234(tree, elem); + else + ret = delpos234(tree, index); + + if (ret != elem) + { + error("del returned %p, expected %p", ret, elem); + } + + verify(); +} + +void deltest(void *elem) +{ + int i; + + i = 0; + while (i < arraylen && cmp(elem, array[i]) > 0) + i++; + if (i >= arraylen || cmp(elem, array[i]) != 0) + return; /* don't do it! */ + delpostest(i); +} + +/* A sample data set and test utility. Designed for pseudo-randomness, + * and yet repeatability. */ + +/* + * This random number generator uses the `portable implementation' + * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits; + * change it if not. + */ +int randomnumber(unsigned *seed) +{ + *seed *= 1103515245; + *seed += 12345; + return ((*seed) / 65536) % 32768; +} + +int mycmp(void *av, void *bv) +{ + char const *a = (char const *) av; + char const *b = (char const *) bv; + return strcmp(a, b); +} + +#define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) + +char *strings[] = { + "0", "2", "3", "I", "K", "d", "H", "J", "Q", "N", "n", "q", "j", "i", + "7", "G", "F", "D", "b", "x", "g", "B", "e", "v", "V", "T", "f", "E", + "S", "8", "A", "k", "X", "p", "C", "R", "a", "o", "r", "O", "Z", "u", + "6", "1", "w", "L", "P", "M", "c", "U", "h", "9", "t", "5", "W", "Y", + "m", "s", "l", "4", +#if 0 + "a", "ab", "absque", "coram", "de", + "palam", "clam", "cum", "ex", "e", + "sine", "tenus", "pro", "prae", + "banana", "carrot", "cabbage", "broccoli", "onion", "zebra", + "penguin", "blancmange", "pangolin", "whale", "hedgehog", + "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux", + "murfl", "spoo", "breen", "flarn", "octothorpe", + "snail", "tiger", "elephant", "octopus", "warthog", "armadillo", + "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin", + "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper", + "wand", "ring", "amulet" +#endif +}; + +#define NSTR lenof(strings) + +void findtest(void) +{ + static const int rels[] = { + REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT + }; + static const char *const relnames[] = { + "EQ", "GE", "LE", "LT", "GT" + }; + int i, j, rel, index; + char *p, *ret, *realret, *realret2; + int lo, hi, mid, c; + + for (i = 0; i < (int) NSTR; i++) + { + p = strings[i]; + for (j = 0; j < (int) (sizeof(rels) / sizeof(*rels)); j++) + { + rel = rels[j]; + + lo = 0; + hi = arraylen - 1; + while (lo <= hi) + { + mid = (lo + hi) / 2; + c = strcmp(p, array[mid]); + if (c < 0) + hi = mid - 1; + else if (c > 0) + lo = mid + 1; + else + break; + } + + if (c == 0) + { + if (rel == REL234_LT) + ret = (mid > 0 ? array[--mid] : NULL); + else if (rel == REL234_GT) + ret = (mid < arraylen - 1 ? array[++mid] : NULL); + else + ret = array[mid]; + } else + { + assert(lo == hi + 1); + if (rel == REL234_LT || rel == REL234_LE) + { + mid = hi; + ret = (hi >= 0 ? array[hi] : NULL); + } else if (rel == REL234_GT || rel == REL234_GE) + { + mid = lo; + ret = (lo < arraylen ? array[lo] : NULL); + } else + ret = NULL; + } + + realret = findrelpos234(tree, p, NULL, rel, &index); + if (realret != ret) + { + error("find(\"%s\",%s) gave %s should be %s", + p, relnames[j], realret, ret); + } + if (realret && index != mid) + { + error("find(\"%s\",%s) gave %d should be %d", + p, relnames[j], index, mid); + } + if (realret && rel == REL234_EQ) + { + realret2 = index234(tree, index); + if (realret2 != realret) + { + error("find(\"%s\",%s) gave %s(%d) but %d -> %s", + p, relnames[j], realret, index, index, realret2); + } + } +#if 0 + printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j], + realret, index); +#endif + } + } + + realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index); + if (arraylen && (realret != array[0] || index != 0)) + { + error("find(NULL,GT) gave %s(%d) should be %s(0)", + realret, index, array[0]); + } else if (!arraylen && (realret != NULL)) + { + error("find(NULL,GT) gave %s(%d) should be NULL", realret, index); + } + + realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index); + if (arraylen + && (realret != array[arraylen - 1] || index != arraylen - 1)) + { + error("find(NULL,LT) gave %s(%d) should be %s(0)", realret, index, + array[arraylen - 1]); + } else if (!arraylen && (realret != NULL)) + { + error("find(NULL,LT) gave %s(%d) should be NULL", realret, index); + } +} + +void splittest(tree234 * tree, void **array, int arraylen) +{ + int i; + tree234 *tree3, *tree4; + for (i = 0; i <= arraylen; i++) + { + tree3 = copytree234(tree, NULL, NULL); + tree4 = splitpos234(tree3, i, 0); + verifytree(tree3, array, i); + verifytree(tree4, array + i, arraylen - i); + join234(tree3, tree4); + freetree234(tree4); /* left empty by join */ + verifytree(tree3, array, arraylen); + freetree234(tree3); + } +} + +int main(void) +{ + int in[NSTR]; + int i, j, k; + int tworoot, tmplen; + unsigned seed = 0; + tree234 *tree2, *tree3, *tree4; + int c; + + setvbuf(stdout, NULL, _IOLBF, 0); + + for (i = 0; i < (int) NSTR; i++) + in[i] = 0; + array = NULL; + arraylen = arraysize = 0; + tree = newtree234(mycmp); + cmp = mycmp; + + verify(); + for (i = 0; i < 10000; i++) + { + j = randomnumber(&seed); + j %= NSTR; + printf("trial: %d\n", i); + if (in[j]) + { + printf("deleting %s (%d)\n", strings[j], j); + deltest(strings[j]); + in[j] = 0; + } else + { + printf("adding %s (%d)\n", strings[j], j); + addtest(strings[j]); + in[j] = 1; + } + disptree(tree); + findtest(); + } + + while (arraylen > 0) + { + j = randomnumber(&seed); + j %= arraylen; + deltest(array[j]); + } + + freetree234(tree); + + /* + * Now try an unsorted tree. We don't really need to test + * delpos234 because we know del234 is based on it, so it's + * already been tested in the above sorted-tree code; but for + * completeness we'll use it to tear down our unsorted tree + * once we've built it. + */ + tree = newtree234(NULL); + cmp = NULL; + verify(); + for (i = 0; i < 1000; i++) + { + printf("trial: %d\n", i); + j = randomnumber(&seed); + j %= NSTR; + k = randomnumber(&seed); + k %= count234(tree) + 1; + printf("adding string %s at index %d\n", strings[j], k); + addpostest(strings[j], k); + } + + /* + * While we have this tree in its full form, we'll take a copy + * of it to use in split and join testing. + */ + tree2 = copytree234(tree, NULL, NULL); + verifytree(tree2, array, arraylen); /* check the copy is accurate */ + /* + * Split tests. Split the tree at every possible point and + * check the resulting subtrees. + */ + tworoot = (!tree2->root->elems[1]); /* see if it has a 2-root */ + splittest(tree2, array, arraylen); + /* + * Now do the split test again, but on a tree that has a 2-root + * (if the previous one didn't) or doesn't (if the previous one + * did). + */ + tmplen = arraylen; + while ((!tree2->root->elems[1]) == tworoot) + { + delpos234(tree2, --tmplen); + } + printf("now trying splits on second tree\n"); + splittest(tree2, array, tmplen); + freetree234(tree2); + + /* + * Back to the main testing of uncounted trees. + */ + while (count234(tree) > 0) + { + printf("cleanup: tree size %d\n", count234(tree)); + j = randomnumber(&seed); + j %= count234(tree); + printf("deleting string %s from index %d\n", (char *) array[j], j); + delpostest(j); + } + freetree234(tree); + + /* + * Finally, do some testing on split/join on _sorted_ trees. At + * the same time, we'll be testing split on very small trees. + */ + tree = newtree234(mycmp); + cmp = mycmp; + arraylen = 0; + for (i = 0; i < 16; i++) + { + addtest(strings[i]); + tree2 = copytree234(tree, NULL, NULL); + splittest(tree2, array, arraylen); + freetree234(tree2); + } + freetree234(tree); + + /* + * Test silly cases of join: join(emptytree, emptytree), and + * also ensure join correctly spots when sorted trees fail the + * ordering constraint. + */ + tree = newtree234(mycmp); + tree2 = newtree234(mycmp); + tree3 = newtree234(mycmp); + tree4 = newtree234(mycmp); + assert(mycmp(strings[0], strings[1]) < 0); /* just in case :-) */ + add234(tree2, strings[1]); + add234(tree4, strings[0]); + array[0] = strings[0]; + array[1] = strings[1]; + verifytree(tree, array, 0); + verifytree(tree2, array + 1, 1); + verifytree(tree3, array, 0); + verifytree(tree4, array, 1); + + /* + * So: + * - join(tree,tree3) should leave both tree and tree3 unchanged. + * - joinr(tree,tree2) should leave both tree and tree2 unchanged. + * - join(tree4,tree3) should leave both tree3 and tree4 unchanged. + * - join(tree, tree2) should move the element from tree2 to tree. + * - joinr(tree4, tree3) should move the element from tree4 to tree3. + * - join(tree,tree3) should return NULL and leave both unchanged. + * - join(tree3,tree) should work and create a bigger tree in tree3. + */ + assert(tree == join234(tree, tree3)); + verifytree(tree, array, 0); + verifytree(tree3, array, 0); + assert(tree2 == join234r(tree, tree2)); + verifytree(tree, array, 0); + verifytree(tree2, array + 1, 1); + assert(tree4 == join234(tree4, tree3)); + verifytree(tree3, array, 0); + verifytree(tree4, array, 1); + assert(tree == join234(tree, tree2)); + verifytree(tree, array + 1, 1); + verifytree(tree2, array, 0); + assert(tree3 == join234r(tree4, tree3)); + verifytree(tree3, array, 1); + verifytree(tree4, array, 0); + assert(NULL == join234(tree, tree3)); + verifytree(tree, array + 1, 1); + verifytree(tree3, array, 1); + assert(tree3 == join234(tree3, tree)); + verifytree(tree3, array, 2); + verifytree(tree, array, 0); + + return 0; +} + +#endif + +#if 0 /* sorted list of strings might be useful */ +{ +"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", + "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", + "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", + "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", + "o", "p", "q", "r", "s", "t", "u", "v", "w", "x",} +#endif diff --git a/Docs/src/bin/halibut/tree234.h b/Docs/src/bin/halibut/tree234.h index ff80ea6..9a636da 100755 --- a/Docs/src/bin/halibut/tree234.h +++ b/Docs/src/bin/halibut/tree234.h @@ -1,202 +1,202 @@ -/*
- * tree234.h: header defining functions in tree234.c.
- *
- * This file is copyright 1999-2001 Simon Tatham.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
- * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef TREE234_H
-#define TREE234_H
-
-/*
- * This typedef is opaque outside tree234.c itself.
- */
-typedef struct tree234_Tag tree234;
-
-typedef int (*cmpfn234) (void *, void *);
-
-typedef void *(*copyfn234) (void *state, void *element);
-
-/*
- * Create a 2-3-4 tree. If `cmp' is NULL, the tree is unsorted, and
- * lookups by key will fail: you can only look things up by numeric
- * index, and you have to use addpos234() and delpos234().
- */
-tree234 *newtree234(cmpfn234 cmp);
-
-/*
- * Free a 2-3-4 tree (not including freeing the elements).
- */
-void freetree234(tree234 * t);
-
-/*
- * Add an element e to a sorted 2-3-4 tree t. Returns e on success,
- * or if an existing element compares equal, returns that.
- */
-void *add234(tree234 * t, void *e);
-
-/*
- * Add an element e to an unsorted 2-3-4 tree t. Returns e on
- * success, NULL on failure. (Failure should only occur if the
- * index is out of range or the tree is sorted.)
- *
- * Index range can be from 0 to the tree's current element count,
- * inclusive.
- */
-void *addpos234(tree234 * t, void *e, int index);
-
-/*
- * Look up the element at a given numeric index in a 2-3-4 tree.
- * Returns NULL if the index is out of range.
- *
- * One obvious use for this function is in iterating over the whole
- * of a tree (sorted or unsorted):
- *
- * for (i = 0; (p = index234(tree, i)) != NULL; i++) consume(p);
- *
- * or
- *
- * int maxcount = count234(tree);
- * for (i = 0; i < maxcount; i++) {
- * p = index234(tree, i);
- * assert(p != NULL);
- * consume(p);
- * }
- */
-void *index234(tree234 * t, int index);
-
-/*
- * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
- * found. e is always passed as the first argument to cmp, so cmp
- * can be an asymmetric function if desired. cmp can also be passed
- * as NULL, in which case the compare function from the tree proper
- * will be used.
- *
- * Three of these functions are special cases of findrelpos234. The
- * non-`pos' variants lack the `index' parameter: if the parameter
- * is present and non-NULL, it must point to an integer variable
- * which will be filled with the numeric index of the returned
- * element.
- *
- * The non-`rel' variants lack the `relation' parameter. This
- * parameter allows you to specify what relation the element you
- * provide has to the element you're looking for. This parameter
- * can be:
- *
- * REL234_EQ - find only an element that compares equal to e
- * REL234_LT - find the greatest element that compares < e
- * REL234_LE - find the greatest element that compares <= e
- * REL234_GT - find the smallest element that compares > e
- * REL234_GE - find the smallest element that compares >= e
- *
- * Non-`rel' variants assume REL234_EQ.
- *
- * If `rel' is REL234_GT or REL234_LT, the `e' parameter may be
- * NULL. In this case, REL234_GT will return the smallest element
- * in the tree, and REL234_LT will return the greatest. This gives
- * an alternative means of iterating over a sorted tree, instead of
- * using index234:
- *
- * // to loop forwards
- * for (p = NULL; (p = findrel234(tree, p, NULL, REL234_GT)) != NULL ;)
- * consume(p);
- *
- * // to loop backwards
- * for (p = NULL; (p = findrel234(tree, p, NULL, REL234_LT)) != NULL ;)
- * consume(p);
- */
-enum {
- REL234_EQ, REL234_LT, REL234_LE, REL234_GT, REL234_GE
-};
-void *find234(tree234 * t, void *e, cmpfn234 cmp);
-void *findrel234(tree234 * t, void *e, cmpfn234 cmp, int relation);
-void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index);
-void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp, int relation,
- int *index);
-
-/*
- * Delete an element e in a 2-3-4 tree. Does not free the element,
- * merely removes all links to it from the tree nodes.
- *
- * delpos234 deletes the element at a particular tree index: it
- * works on both sorted and unsorted trees.
- *
- * del234 deletes the element passed to it, so it only works on
- * sorted trees. (It's equivalent to using findpos234 to determine
- * the index of an element, and then passing that index to
- * delpos234.)
- *
- * Both functions return a pointer to the element they delete, for
- * the user to free or pass on elsewhere or whatever. If the index
- * is out of range (delpos234) or the element is already not in the
- * tree (del234) then they return NULL.
- */
-void *del234(tree234 * t, void *e);
-void *delpos234(tree234 * t, int index);
-
-/*
- * Return the total element count of a tree234.
- */
-int count234(tree234 * t);
-
-/*
- * Split a tree234 into two valid tree234s.
- *
- * splitpos234 splits at a given index. If `before' is TRUE, the
- * items at and after that index are left in t and the ones before
- * are returned; if `before' is FALSE, the items before that index
- * are left in t and the rest are returned.
- *
- * split234 splits at a given key. You can pass any of the
- * relations used with findrel234, except for REL234_EQ. The items
- * in the tree that satisfy the relation are returned; the
- * remainder are left.
- */
-tree234 *splitpos234(tree234 * t, int index, int before);
-tree234 *split234(tree234 * t, void *e, cmpfn234 cmp, int rel);
-
-/*
- * Join two tree234s together into a single one.
- *
- * All the elements in t1 are placed to the left of all the
- * elements in t2. If the trees are sorted, there will be a test to
- * ensure that this satisfies the ordering criterion, and NULL will
- * be returned otherwise. If the trees are unsorted, there is no
- * restriction on the use of join234.
- *
- * The tree returned is t1 (join234) or t2 (join234r), if the
- * operation is successful.
- */
-tree234 *join234(tree234 * t1, tree234 * t2);
-tree234 *join234r(tree234 * t1, tree234 * t2);
-
-/*
- * Make a complete copy of a tree234. Element pointers will be
- * reused unless copyfn is non-NULL, in which case it will be used
- * to copy each element. (copyfn takes two `void *' parameters; the
- * first is private state and the second is the element. A simple
- * copy routine probably won't need private state.)
- */
-tree234 *copytree234(tree234 * t, copyfn234 copyfn, void *copyfnstate);
-
-#endif /* TREE234_H */
+/* + * tree234.h: header defining functions in tree234.c. + * + * This file is copyright 1999-2001 Simon Tatham. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef TREE234_H +#define TREE234_H + +/* + * This typedef is opaque outside tree234.c itself. + */ +typedef struct tree234_Tag tree234; + +typedef int (*cmpfn234) (void *, void *); + +typedef void *(*copyfn234) (void *state, void *element); + +/* + * Create a 2-3-4 tree. If `cmp' is NULL, the tree is unsorted, and + * lookups by key will fail: you can only look things up by numeric + * index, and you have to use addpos234() and delpos234(). + */ +tree234 *newtree234(cmpfn234 cmp); + +/* + * Free a 2-3-4 tree (not including freeing the elements). + */ +void freetree234(tree234 * t); + +/* + * Add an element e to a sorted 2-3-4 tree t. Returns e on success, + * or if an existing element compares equal, returns that. + */ +void *add234(tree234 * t, void *e); + +/* + * Add an element e to an unsorted 2-3-4 tree t. Returns e on + * success, NULL on failure. (Failure should only occur if the + * index is out of range or the tree is sorted.) + * + * Index range can be from 0 to the tree's current element count, + * inclusive. + */ +void *addpos234(tree234 * t, void *e, int index); + +/* + * Look up the element at a given numeric index in a 2-3-4 tree. + * Returns NULL if the index is out of range. + * + * One obvious use for this function is in iterating over the whole + * of a tree (sorted or unsorted): + * + * for (i = 0; (p = index234(tree, i)) != NULL; i++) consume(p); + * + * or + * + * int maxcount = count234(tree); + * for (i = 0; i < maxcount; i++) { + * p = index234(tree, i); + * assert(p != NULL); + * consume(p); + * } + */ +void *index234(tree234 * t, int index); + +/* + * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not + * found. e is always passed as the first argument to cmp, so cmp + * can be an asymmetric function if desired. cmp can also be passed + * as NULL, in which case the compare function from the tree proper + * will be used. + * + * Three of these functions are special cases of findrelpos234. The + * non-`pos' variants lack the `index' parameter: if the parameter + * is present and non-NULL, it must point to an integer variable + * which will be filled with the numeric index of the returned + * element. + * + * The non-`rel' variants lack the `relation' parameter. This + * parameter allows you to specify what relation the element you + * provide has to the element you're looking for. This parameter + * can be: + * + * REL234_EQ - find only an element that compares equal to e + * REL234_LT - find the greatest element that compares < e + * REL234_LE - find the greatest element that compares <= e + * REL234_GT - find the smallest element that compares > e + * REL234_GE - find the smallest element that compares >= e + * + * Non-`rel' variants assume REL234_EQ. + * + * If `rel' is REL234_GT or REL234_LT, the `e' parameter may be + * NULL. In this case, REL234_GT will return the smallest element + * in the tree, and REL234_LT will return the greatest. This gives + * an alternative means of iterating over a sorted tree, instead of + * using index234: + * + * // to loop forwards + * for (p = NULL; (p = findrel234(tree, p, NULL, REL234_GT)) != NULL ;) + * consume(p); + * + * // to loop backwards + * for (p = NULL; (p = findrel234(tree, p, NULL, REL234_LT)) != NULL ;) + * consume(p); + */ +enum { + REL234_EQ, REL234_LT, REL234_LE, REL234_GT, REL234_GE +}; +void *find234(tree234 * t, void *e, cmpfn234 cmp); +void *findrel234(tree234 * t, void *e, cmpfn234 cmp, int relation); +void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index); +void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp, int relation, + int *index); + +/* + * Delete an element e in a 2-3-4 tree. Does not free the element, + * merely removes all links to it from the tree nodes. + * + * delpos234 deletes the element at a particular tree index: it + * works on both sorted and unsorted trees. + * + * del234 deletes the element passed to it, so it only works on + * sorted trees. (It's equivalent to using findpos234 to determine + * the index of an element, and then passing that index to + * delpos234.) + * + * Both functions return a pointer to the element they delete, for + * the user to free or pass on elsewhere or whatever. If the index + * is out of range (delpos234) or the element is already not in the + * tree (del234) then they return NULL. + */ +void *del234(tree234 * t, void *e); +void *delpos234(tree234 * t, int index); + +/* + * Return the total element count of a tree234. + */ +int count234(tree234 * t); + +/* + * Split a tree234 into two valid tree234s. + * + * splitpos234 splits at a given index. If `before' is TRUE, the + * items at and after that index are left in t and the ones before + * are returned; if `before' is FALSE, the items before that index + * are left in t and the rest are returned. + * + * split234 splits at a given key. You can pass any of the + * relations used with findrel234, except for REL234_EQ. The items + * in the tree that satisfy the relation are returned; the + * remainder are left. + */ +tree234 *splitpos234(tree234 * t, int index, int before); +tree234 *split234(tree234 * t, void *e, cmpfn234 cmp, int rel); + +/* + * Join two tree234s together into a single one. + * + * All the elements in t1 are placed to the left of all the + * elements in t2. If the trees are sorted, there will be a test to + * ensure that this satisfies the ordering criterion, and NULL will + * be returned otherwise. If the trees are unsorted, there is no + * restriction on the use of join234. + * + * The tree returned is t1 (join234) or t2 (join234r), if the + * operation is successful. + */ +tree234 *join234(tree234 * t1, tree234 * t2); +tree234 *join234r(tree234 * t1, tree234 * t2); + +/* + * Make a complete copy of a tree234. Element pointers will be + * reused unless copyfn is non-NULL, in which case it will be used + * to copy each element. (copyfn takes two `void *' parameters; the + * first is private state and the second is the element. A simple + * copy routine probably won't need private state.) + */ +tree234 *copytree234(tree234 * t, copyfn234 copyfn, void *copyfnstate); + +#endif /* TREE234_H */ diff --git a/Docs/src/bin/halibut/ustring.c b/Docs/src/bin/halibut/ustring.c index 8811546..9ab4ba6 100755 --- a/Docs/src/bin/halibut/ustring.c +++ b/Docs/src/bin/halibut/ustring.c @@ -1,201 +1,201 @@ -/*
- * ustring.c: Unicode string routines
- */
-
-#include <wchar.h>
-#include <time.h>
-#include "halibut.h"
-
-wchar_t *ustrdup(wchar_t * s)
-{
- wchar_t *r;
- if (s)
- {
- r = mknewa(wchar_t, 1 + ustrlen(s));
- ustrcpy(r, s);
- } else
- {
- r = mknew(wchar_t);
- *r = 0;
- }
- return r;
-}
-
-char *ustrtoa(wchar_t * s, char *outbuf, int size)
-{
- char *p;
- if (!s)
- {
- *outbuf = '\0';
- return outbuf;
- }
- for (p = outbuf; *s && p < outbuf + size; p++, s++)
- *p = *(char*)s;
- if (p < outbuf + size)
- *p = '\0';
- else
- outbuf[size - 1] = '\0';
- return outbuf;
-}
-
-int ustrlen(wchar_t * s)
-{
- int len = 0;
- while (*s++)
- len++;
- return len;
-}
-
-wchar_t *uadv(wchar_t * s)
-{
- return s + 1 + ustrlen(s);
-}
-
-wchar_t *ustrcpy(wchar_t * dest, wchar_t * source)
-{
- wchar_t *ret = dest;
- do
- {
- *dest++ = *source;
- }
- while (*source++);
- return ret;
-}
-
-int ustrcmp(wchar_t * lhs, wchar_t * rhs)
-{
- if (!lhs && !rhs)
- return 0;
- if (!lhs)
- return -1;
- if (!rhs)
- return +1;
- while (*lhs && *rhs && *lhs == *rhs)
- lhs++, rhs++;
- if (*lhs < *rhs)
- return -1;
- else if (*lhs > *rhs)
- return 1;
- return 0;
-}
-
-wchar_t utolower(wchar_t c)
-{
- if (c == L'\0')
- return c; /* this property needed by ustricmp */
- /* FIXME: this doesn't even come close */
- if (c >= 'A' && c <= 'Z')
- c += 'a' - 'A';
- return c;
-}
-
-int ustricmp(wchar_t * lhs, wchar_t * rhs)
-{
- wchar_t lc, rc;
- while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
- lhs++, rhs++;
- if (!lc && !rc)
- return 0;
- if (lc < rc)
- return -1;
- else
- return 1;
-}
-
-wchar_t *ustrlow(wchar_t * s)
-{
- wchar_t *p = s;
- while (*p)
- {
- *p = utolower(*p);
- p++;
- }
- return s;
-}
-
-int utoi(wchar_t * s)
-{
- int sign = +1;
- int n;
-
- if (*s == L'-')
- {
- s++;
- sign = -1;
- }
-
- n = 0;
- while (*s && *s >= L'0' && *s <= L'9')
- {
- n *= 10;
- n += (*s - '0');
- s++;
- }
-
- return n;
-}
-
-int utob(wchar_t * s)
-{
- if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") ||
- !ustricmp(s, L"true") || !ustricmp(s, L"t"))
- return TRUE;
- return FALSE;
-}
-
-int uisdigit(wchar_t c)
-{
- return c >= L'0' && c <= L'9';
-}
-
-#define USTRFTIME_DELTA 128
-wchar_t *ustrftime(wchar_t * wfmt, struct tm * timespec)
-{
- void *blk = NULL;
- wchar_t *wblk, *wp;
- char *fmt, *text, *p;
- size_t size = 0;
- size_t len;
-
- /*
- * strftime has the entertaining property that it returns 0
- * _either_ on out-of-space _or_ on successful generation of
- * the empty string. Hence we must ensure our format can never
- * generate the empty string. Somebody throw a custard pie at
- * whoever was responsible for that. Please?
- */
- if (wfmt)
- {
- len = ustrlen(wfmt);
- fmt = mknewa(char, 2 + len);
- ustrtoa(wfmt, fmt + 1, len + 1);
- fmt[0] = ' ';
- } else
- fmt = " %c";
-
- while (1)
- {
- size += USTRFTIME_DELTA;
- blk = resize((char *) blk, size);
- len = strftime((char *) blk, size - 1, fmt, timespec);
- if (len > 0)
- break;
- }
-
- /* Note: +1 for the terminating 0, -1 for the initial space in fmt */
- wblk = resize((wchar_t *) blk, len);
- text = mknewa(char, len);
- strftime(text, len, fmt + 1, timespec);
- /*
- * We operate in the C locale, so this all ought to be kosher
- * ASCII. If we ever move outside ASCII machines, we may need
- * to make this more portable...
- */
- for (wp = wblk, p = text; *p; p++, wp++)
- *wp = *p;
- *wp = 0;
- if (wfmt)
- sfree(fmt);
- sfree(text);
- return wblk;
-}
+/* + * ustring.c: Unicode string routines + */ + +#include <wchar.h> +#include <time.h> +#include "halibut.h" + +wchar_t *ustrdup(wchar_t * s) +{ + wchar_t *r; + if (s) + { + r = mknewa(wchar_t, 1 + ustrlen(s)); + ustrcpy(r, s); + } else + { + r = mknew(wchar_t); + *r = 0; + } + return r; +} + +char *ustrtoa(wchar_t * s, char *outbuf, int size) +{ + char *p; + if (!s) + { + *outbuf = '\0'; + return outbuf; + } + for (p = outbuf; *s && p < outbuf + size; p++, s++) + *p = *(char*)s; + if (p < outbuf + size) + *p = '\0'; + else + outbuf[size - 1] = '\0'; + return outbuf; +} + +int ustrlen(wchar_t * s) +{ + int len = 0; + while (*s++) + len++; + return len; +} + +wchar_t *uadv(wchar_t * s) +{ + return s + 1 + ustrlen(s); +} + +wchar_t *ustrcpy(wchar_t * dest, wchar_t * source) +{ + wchar_t *ret = dest; + do + { + *dest++ = *source; + } + while (*source++); + return ret; +} + +int ustrcmp(wchar_t * lhs, wchar_t * rhs) +{ + if (!lhs && !rhs) + return 0; + if (!lhs) + return -1; + if (!rhs) + return +1; + while (*lhs && *rhs && *lhs == *rhs) + lhs++, rhs++; + if (*lhs < *rhs) + return -1; + else if (*lhs > *rhs) + return 1; + return 0; +} + +wchar_t utolower(wchar_t c) +{ + if (c == L'\0') + return c; /* this property needed by ustricmp */ + /* FIXME: this doesn't even come close */ + if (c >= 'A' && c <= 'Z') + c += 'a' - 'A'; + return c; +} + +int ustricmp(wchar_t * lhs, wchar_t * rhs) +{ + wchar_t lc, rc; + while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc) + lhs++, rhs++; + if (!lc && !rc) + return 0; + if (lc < rc) + return -1; + else + return 1; +} + +wchar_t *ustrlow(wchar_t * s) +{ + wchar_t *p = s; + while (*p) + { + *p = utolower(*p); + p++; + } + return s; +} + +int utoi(wchar_t * s) +{ + int sign = +1; + int n; + + if (*s == L'-') + { + s++; + sign = -1; + } + + n = 0; + while (*s && *s >= L'0' && *s <= L'9') + { + n *= 10; + n += (*s - '0'); + s++; + } + + return n; +} + +int utob(wchar_t * s) +{ + if (!ustricmp(s, L"yes") || !ustricmp(s, L"y") || + !ustricmp(s, L"true") || !ustricmp(s, L"t")) + return TRUE; + return FALSE; +} + +int uisdigit(wchar_t c) +{ + return c >= L'0' && c <= L'9'; +} + +#define USTRFTIME_DELTA 128 +wchar_t *ustrftime(wchar_t * wfmt, struct tm * timespec) +{ + void *blk = NULL; + wchar_t *wblk, *wp; + char *fmt, *text, *p; + size_t size = 0; + size_t len; + + /* + * strftime has the entertaining property that it returns 0 + * _either_ on out-of-space _or_ on successful generation of + * the empty string. Hence we must ensure our format can never + * generate the empty string. Somebody throw a custard pie at + * whoever was responsible for that. Please? + */ + if (wfmt) + { + len = ustrlen(wfmt); + fmt = mknewa(char, 2 + len); + ustrtoa(wfmt, fmt + 1, len + 1); + fmt[0] = ' '; + } else + fmt = " %c"; + + while (1) + { + size += USTRFTIME_DELTA; + blk = resize((char *) blk, size); + len = strftime((char *) blk, size - 1, fmt, timespec); + if (len > 0) + break; + } + + /* Note: +1 for the terminating 0, -1 for the initial space in fmt */ + wblk = resize((wchar_t *) blk, len); + text = mknewa(char, len); + strftime(text, len, fmt + 1, timespec); + /* + * We operate in the C locale, so this all ought to be kosher + * ASCII. If we ever move outside ASCII machines, we may need + * to make this more portable... + */ + for (wp = wblk, p = text; *p; p++, wp++) + *wp = *p; + *wp = 0; + if (wfmt) + sfree(fmt); + sfree(text); + return wblk; +} diff --git a/Docs/src/bin/halibut/version.c b/Docs/src/bin/halibut/version.c index e59e97c..634bedc 100755 --- a/Docs/src/bin/halibut/version.c +++ b/Docs/src/bin/halibut/version.c @@ -1,13 +1,13 @@ -/*
- * version.c: version string
- */
-
-#include <stdio.h>
-
-#ifndef VERSION
-#define VER "anonymous build (" __DATE__ " " __TIME__ ")"
-#else
-#define VER "version " VERSION
-#endif
-
-const char *const version = "version 1.0 (NSIS Custom Build)";
+/* + * version.c: version string + */ + +#include <stdio.h> + +#ifndef VERSION +#define VER "anonymous build (" __DATE__ " " __TIME__ ")" +#else +#define VER "version " VERSION +#endif + +const char *const version = "version 1.0 (NSIS Custom Build)"; |