diff options
author | Colin Watson <cjwatson@debian.org> | 2022-01-17 00:50:05 +0000 |
---|---|---|
committer | Colin Watson <cjwatson@debian.org> | 2022-01-17 00:50:05 +0000 |
commit | dd637d7cf962cd464f127efcb69dede6adf297e6 (patch) | |
tree | 1ef76468fc178e47a6e118632e114b28df7437e2 | |
parent | 83b0600bd452740e357ddd925a4927b96dd9ce5a (diff) |
Avoid libpipeline linkage in libman
Since it's only used by one function, and not all of man-db's tools need
libpipeline in their own right (e.g. accessdb), it seems worth pushing
this up a layer.
* lib/encodings.c (struct conversion_entry, conversion_table,
convert_encoding, check_preprocessor_encoding): Move to ...
* src/manconv.c: ... here.
* lib/encodings.h (check_preprocessor_encoding): Move to ...
* src/manconv.h: ... here.
* lib/Makefile.am (libman_la_CPPFLAGS): Remove $(libpipeline_CFLAGS).
(libman_la_LDFLAGS): Remove $(libpipeline_LIBS).
* src/man-recode.c, src/man.c, src/manconv.c: Include manconv.h.
-rw-r--r-- | lib/Makefile.am | 2 | ||||
-rw-r--r-- | lib/encodings.c | 156 | ||||
-rw-r--r-- | lib/encodings.h | 4 | ||||
-rw-r--r-- | src/man-recode.c | 1 | ||||
-rw-r--r-- | src/man.c | 1 | ||||
-rw-r--r-- | src/manconv.c | 157 | ||||
-rw-r--r-- | src/manconv.h | 2 |
7 files changed, 160 insertions, 163 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am index 72ce5ad2..7ced9a2c 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -29,7 +29,6 @@ libman_la_CPPFLAGS = \ -I$(top_srcdir)/gl/lib \ -I$(top_builddir)/gl/lib \ -DLOCALEDIR=\"$(localedir)\" \ - $(libpipeline_CFLAGS) \ $(libseccomp_CFLAGS) libman_la_SOURCES = \ @@ -69,5 +68,4 @@ libman_la_LIBADD = ../gl/lib/libgnu.la $(LTLIBOBJS) \ libman_la_LDFLAGS = \ -avoid-version -release $(VERSION) -rpath $(pkglibdir) \ -no-undefined \ - $(libpipeline_LIBS) \ $(libseccomp_LIBS) diff --git a/lib/encodings.c b/lib/encodings.c index c3373ac2..0db5b8c0 100644 --- a/lib/encodings.c +++ b/lib/encodings.c @@ -29,7 +29,6 @@ #include <string.h> #include <stdlib.h> #include <unistd.h> -#include <assert.h> #include <locale.h> #include <ctype.h> @@ -38,7 +37,6 @@ #include "localcharset.h" #include "xalloc.h" #include "xstrndup.h" -#include "xvasprintf.h" #include "manconfig.h" @@ -328,59 +326,6 @@ static struct less_charset_entry less_charset_table[] = { static const char fallback_less_charset[] = "iso8859"; -/* Encoding conversions from groff-1.20/src/preproc/preconv/preconv.cpp. - * I've only included those not already recognised by GNU libiconv. - */ -struct conversion_entry { - const char *from; - const char *to; -}; - -static struct conversion_entry conversion_table[] = { - { "chinese-big5", "Big5" }, - { "chinese-euc", "GB2312" }, - { "chinese-iso-8bit", "GB2312" }, - { "cn-gb-2312", "GB2312" }, - { "cp878", "KOI8-R" }, - { "cyrillic-iso-8bit", "ISO-8859-5" }, - { "cyrillic-koi8", "KOI8-R" }, - { "euc-china", "GB2312" }, - { "euc-japan", "EUC-JP" }, - { "euc-japan-1990", "EUC-JP" }, - { "euc-kr", "EUC-KR" }, - { "greek-iso-8bit", "ISO-8859-7" }, - { "iso-latin-1", "ISO-8859-1" }, - { "iso-latin-2", "ISO-8859-2" }, - { "iso-latin-5", "ISO-8859-9" }, - { "iso-latin-7", "ISO-8859-13" }, - { "iso-latin-9", "ISO-8859-15" }, - { "japanese-iso-8bit", "EUC-JP" }, - { "japanese-euc", "EUC-JP" }, - { "jis8", "EUC-JP" }, - { "korean-euc", "EUC-KR" }, - { "korean-iso-8bit", "EUC-KR" }, - { "latin-0", "ISO-8859-15" }, - { "latin-1", "ISO-8859-1" }, - { "latin-2", "ISO-8859-2" }, - { "latin-5", "ISO-8859-9" }, - { "latin-7", "ISO-8859-13" }, - { "mule-utf-16", "UTF-16" }, - { "mule-utf-16be", "UTF-16BE" }, - { "mule-utf-16-be", "UTF-16BE" }, - { "mule-utf-16be-with-signature", "UTF-16" }, - { "mule-utf-16le", "UTF-16LE" }, - { "mule-utf-16-le", "UTF-16LE" }, - { "mule-utf-16le-with-signature", "UTF-16" }, - { "mule-utf-8", "UTF-8" }, - { "utf-16-be", "UTF-16BE" }, - { "utf-16be-with-signature", "UTF-16" }, - { "utf-16-be-with-signature", "UTF-16" }, - { "utf-16-le", "UTF-16LE" }, - { "utf-16le-with-signature", "UTF-16" }, - { "utf-16-le-with-signature", "UTF-16" }, - { NULL, NULL } -}; - const char *groff_preconv = NULL; /* Is the groff "preconv" helper available? If so, return its name. @@ -833,104 +778,3 @@ const char * ATTRIBUTE_PURE get_jless_charset (const char *charset_from_locale) return NULL; } - -/* Convert Emacs-style coding tags to ones that libiconv understands. */ -static char *convert_encoding (char *encoding) -{ - size_t encoding_len = strlen (encoding); - const struct conversion_entry *entry; - -#define STRIP(s, l) do { \ - if (encoding_len > (l) && \ - !strcasecmp (encoding + encoding_len - (l), (s))) \ - encoding[encoding_len - (l)] = '\0'; \ -} while (0) - - STRIP ("-dos", 4); - STRIP ("-mac", 4); - STRIP ("-unix", 5); - -#undef STRIP - - for (entry = conversion_table; entry->from; ++entry) - if (!strcasecmp (entry->from, encoding)) { - free (encoding); - return xstrdup (entry->to); - } - - return encoding; -} - -/* Inspect the first line of data in a pipeline for preprocessor encoding - * declarations. - * - * If to_encoding and modified_line are both non-NULL, and if the encoding - * declaration in the input does not match to_encoding, then return an - * encoding declaration line modified to refer to the given to_encoding in - * *modified_line. The caller should free *modified_line. - */ -char *check_preprocessor_encoding (pipeline *p, const char *to_encoding, - char **modified_line) -{ - char *pp_encoding = NULL; - const char *line = pipeline_peekline (p); - const char *directive = NULL, *directive_end = NULL, *pp_search = NULL; - size_t pp_encoding_len = 0; - - /* Some people use .\" incorrectly. We allow it for encoding - * declarations but not for preprocessor declarations. - */ - if (line && - (STRNEQ (line, PP_COOKIE, 4) || STRNEQ (line, ".\\\" ", 4))) { - const char *newline = strchr (line, '\n'); - - directive = line + 4; - directive_end = newline ? newline : strchr (directive, '\0'); - pp_search = memmem (directive, directive_end - directive, - "-*-", 3); - } - - if (directive && pp_search) { - pp_search += 3; - while (pp_search && pp_search < directive_end && *pp_search) { - while (*pp_search == ' ') - ++pp_search; - if (STRNEQ (pp_search, "coding:", 7)) { - const char *pp_encoding_allow; - pp_search += 7; - while (*pp_search == ' ') - ++pp_search; - pp_encoding_allow = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789-_/:.()"; - pp_encoding_len = strspn (pp_search, - pp_encoding_allow); - pp_encoding = xstrndup (pp_search, - pp_encoding_len); - pp_encoding = convert_encoding (pp_encoding); - debug ("preprocessor encoding: %s\n", - pp_encoding); - break; - } else { - pp_search = memchr (pp_search, ';', - directive_end - pp_search); - if (pp_search) - ++pp_search; - } - } - } - - if (to_encoding && modified_line && - pp_encoding && strcasecmp (pp_encoding, to_encoding)) { - assert (directive_end); - assert (pp_search); - *modified_line = xasprintf - ("%.*s%s%.*s\n", - (int) (pp_search - line), line, - to_encoding, - (int) (directive_end - (pp_search + pp_encoding_len)), - pp_search + pp_encoding_len); - } - - return pp_encoding; -} diff --git a/lib/encodings.h b/lib/encodings.h index 6f07f0c6..6de48f72 100644 --- a/lib/encodings.h +++ b/lib/encodings.h @@ -22,8 +22,6 @@ #include <stdbool.h> -struct pipeline; - const char *get_groff_preconv (void); char *get_page_encoding (const char *lang); const char *get_source_encoding (const char *lang); @@ -38,5 +36,3 @@ const char *get_roff_encoding (const char *device, const char *get_output_encoding (const char *device); const char *get_less_charset (const char *locale_charset); const char *get_jless_charset (const char *locale_charset); -char *check_preprocessor_encoding (struct pipeline *p, const char *to_code, - char **modified_line); diff --git a/src/man-recode.c b/src/man-recode.c index 8ce975e3..fd5380fe 100644 --- a/src/man-recode.c +++ b/src/man-recode.c @@ -60,6 +60,7 @@ #include "compression.h" #include "decompress.h" +#include "manconv.h" #include "manconv_client.h" int quiet = 0; @@ -109,6 +109,7 @@ #include "ult_src.h" #include "manp.h" #include "zsoelim.h" +#include "manconv.h" #include "manconv_client.h" #ifdef MAN_OWNER diff --git a/src/manconv.c b/src/manconv.c index 01f329cf..c3afa32f 100644 --- a/src/manconv.c +++ b/src/manconv.c @@ -33,6 +33,7 @@ # include "config.h" #endif /* HAVE_CONFIG_H */ +#include <assert.h> #include <stdio.h> #include <errno.h> #include <stdlib.h> @@ -51,6 +52,7 @@ #include "gl_list.h" #include "xalloc.h" #include "xstrndup.h" +#include "xvasprintf.h" #include "gettext.h" #include <locale.h> @@ -61,11 +63,164 @@ #include "pipeline.h" #include "debug.h" -#include "encodings.h" #include "glcontainers.h" #include "manconv.h" +/* Encoding conversions from groff-1.20/src/preproc/preconv/preconv.cpp. + * I've only included those not already recognised by GNU libiconv. + */ +struct conversion_entry { + const char *from; + const char *to; +}; + +static struct conversion_entry conversion_table[] = { + { "chinese-big5", "Big5" }, + { "chinese-euc", "GB2312" }, + { "chinese-iso-8bit", "GB2312" }, + { "cn-gb-2312", "GB2312" }, + { "cp878", "KOI8-R" }, + { "cyrillic-iso-8bit", "ISO-8859-5" }, + { "cyrillic-koi8", "KOI8-R" }, + { "euc-china", "GB2312" }, + { "euc-japan", "EUC-JP" }, + { "euc-japan-1990", "EUC-JP" }, + { "euc-kr", "EUC-KR" }, + { "greek-iso-8bit", "ISO-8859-7" }, + { "iso-latin-1", "ISO-8859-1" }, + { "iso-latin-2", "ISO-8859-2" }, + { "iso-latin-5", "ISO-8859-9" }, + { "iso-latin-7", "ISO-8859-13" }, + { "iso-latin-9", "ISO-8859-15" }, + { "japanese-iso-8bit", "EUC-JP" }, + { "japanese-euc", "EUC-JP" }, + { "jis8", "EUC-JP" }, + { "korean-euc", "EUC-KR" }, + { "korean-iso-8bit", "EUC-KR" }, + { "latin-0", "ISO-8859-15" }, + { "latin-1", "ISO-8859-1" }, + { "latin-2", "ISO-8859-2" }, + { "latin-5", "ISO-8859-9" }, + { "latin-7", "ISO-8859-13" }, + { "mule-utf-16", "UTF-16" }, + { "mule-utf-16be", "UTF-16BE" }, + { "mule-utf-16-be", "UTF-16BE" }, + { "mule-utf-16be-with-signature", "UTF-16" }, + { "mule-utf-16le", "UTF-16LE" }, + { "mule-utf-16-le", "UTF-16LE" }, + { "mule-utf-16le-with-signature", "UTF-16" }, + { "mule-utf-8", "UTF-8" }, + { "utf-16-be", "UTF-16BE" }, + { "utf-16be-with-signature", "UTF-16" }, + { "utf-16-be-with-signature", "UTF-16" }, + { "utf-16-le", "UTF-16LE" }, + { "utf-16le-with-signature", "UTF-16" }, + { "utf-16-le-with-signature", "UTF-16" }, + { NULL, NULL } +}; + +/* Convert Emacs-style coding tags to ones that libiconv understands. */ +static char *convert_encoding (char *encoding) +{ + size_t encoding_len = strlen (encoding); + const struct conversion_entry *entry; + +#define STRIP(s, l) do { \ + if (encoding_len > (l) && \ + !strcasecmp (encoding + encoding_len - (l), (s))) \ + encoding[encoding_len - (l)] = '\0'; \ +} while (0) + + STRIP ("-dos", 4); + STRIP ("-mac", 4); + STRIP ("-unix", 5); + +#undef STRIP + + for (entry = conversion_table; entry->from; ++entry) + if (!strcasecmp (entry->from, encoding)) { + free (encoding); + return xstrdup (entry->to); + } + + return encoding; +} + +/* Inspect the first line of data in a pipeline for preprocessor encoding + * declarations. + * + * If to_encoding and modified_line are both non-NULL, and if the encoding + * declaration in the input does not match to_encoding, then return an + * encoding declaration line modified to refer to the given to_encoding in + * *modified_line. The caller should free *modified_line. + */ +char *check_preprocessor_encoding (pipeline *p, const char *to_encoding, + char **modified_line) +{ + char *pp_encoding = NULL; + const char *line = pipeline_peekline (p); + const char *directive = NULL, *directive_end = NULL, *pp_search = NULL; + size_t pp_encoding_len = 0; + + /* Some people use .\" incorrectly. We allow it for encoding + * declarations but not for preprocessor declarations. + */ + if (line && + (STRNEQ (line, PP_COOKIE, 4) || STRNEQ (line, ".\\\" ", 4))) { + const char *newline = strchr (line, '\n'); + + directive = line + 4; + directive_end = newline ? newline : strchr (directive, '\0'); + pp_search = memmem (directive, directive_end - directive, + "-*-", 3); + } + + if (directive && pp_search) { + pp_search += 3; + while (pp_search && pp_search < directive_end && *pp_search) { + while (*pp_search == ' ') + ++pp_search; + if (STRNEQ (pp_search, "coding:", 7)) { + const char *pp_encoding_allow; + pp_search += 7; + while (*pp_search == ' ') + ++pp_search; + pp_encoding_allow = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789-_/:.()"; + pp_encoding_len = strspn (pp_search, + pp_encoding_allow); + pp_encoding = xstrndup (pp_search, + pp_encoding_len); + pp_encoding = convert_encoding (pp_encoding); + debug ("preprocessor encoding: %s\n", + pp_encoding); + break; + } else { + pp_search = memchr (pp_search, ';', + directive_end - pp_search); + if (pp_search) + ++pp_search; + } + } + } + + if (to_encoding && modified_line && + pp_encoding && strcasecmp (pp_encoding, to_encoding)) { + assert (directive_end); + assert (pp_search); + *modified_line = xasprintf + ("%.*s%s%.*s\n", + (int) (pp_search - line), line, + to_encoding, + (int) (directive_end - (pp_search + pp_encoding_len)), + pp_search + pp_encoding_len); + } + + return pp_encoding; +} + #ifdef HAVE_ICONV /* When converting text containing an invalid multibyte sequence to diff --git a/src/manconv.h b/src/manconv.h index ad8dd99a..5d3de1de 100644 --- a/src/manconv.h +++ b/src/manconv.h @@ -24,4 +24,6 @@ struct pipeline; +char *check_preprocessor_encoding (struct pipeline *p, const char *to_code, + char **modified_line); void manconv (struct pipeline *p, gl_list_t from, const char *to); |