summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorColin Watson <cjwatson@debian.org>2022-01-17 00:50:05 +0000
committerColin Watson <cjwatson@debian.org>2022-01-17 00:50:05 +0000
commitdd637d7cf962cd464f127efcb69dede6adf297e6 (patch)
tree1ef76468fc178e47a6e118632e114b28df7437e2
parent83b0600bd452740e357ddd925a4927b96dd9ce5a (diff)
Avoid libpipeline linkage in libman
Since it's only used by one function, and not all of man-db's tools need libpipeline in their own right (e.g. accessdb), it seems worth pushing this up a layer. * lib/encodings.c (struct conversion_entry, conversion_table, convert_encoding, check_preprocessor_encoding): Move to ... * src/manconv.c: ... here. * lib/encodings.h (check_preprocessor_encoding): Move to ... * src/manconv.h: ... here. * lib/Makefile.am (libman_la_CPPFLAGS): Remove $(libpipeline_CFLAGS). (libman_la_LDFLAGS): Remove $(libpipeline_LIBS). * src/man-recode.c, src/man.c, src/manconv.c: Include manconv.h.
-rw-r--r--lib/Makefile.am2
-rw-r--r--lib/encodings.c156
-rw-r--r--lib/encodings.h4
-rw-r--r--src/man-recode.c1
-rw-r--r--src/man.c1
-rw-r--r--src/manconv.c157
-rw-r--r--src/manconv.h2
7 files changed, 160 insertions, 163 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 72ce5ad2..7ced9a2c 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -29,7 +29,6 @@ libman_la_CPPFLAGS = \
-I$(top_srcdir)/gl/lib \
-I$(top_builddir)/gl/lib \
-DLOCALEDIR=\"$(localedir)\" \
- $(libpipeline_CFLAGS) \
$(libseccomp_CFLAGS)
libman_la_SOURCES = \
@@ -69,5 +68,4 @@ libman_la_LIBADD = ../gl/lib/libgnu.la $(LTLIBOBJS) \
libman_la_LDFLAGS = \
-avoid-version -release $(VERSION) -rpath $(pkglibdir) \
-no-undefined \
- $(libpipeline_LIBS) \
$(libseccomp_LIBS)
diff --git a/lib/encodings.c b/lib/encodings.c
index c3373ac2..0db5b8c0 100644
--- a/lib/encodings.c
+++ b/lib/encodings.c
@@ -29,7 +29,6 @@
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
-#include <assert.h>
#include <locale.h>
#include <ctype.h>
@@ -38,7 +37,6 @@
#include "localcharset.h"
#include "xalloc.h"
#include "xstrndup.h"
-#include "xvasprintf.h"
#include "manconfig.h"
@@ -328,59 +326,6 @@ static struct less_charset_entry less_charset_table[] = {
static const char fallback_less_charset[] = "iso8859";
-/* Encoding conversions from groff-1.20/src/preproc/preconv/preconv.cpp.
- * I've only included those not already recognised by GNU libiconv.
- */
-struct conversion_entry {
- const char *from;
- const char *to;
-};
-
-static struct conversion_entry conversion_table[] = {
- { "chinese-big5", "Big5" },
- { "chinese-euc", "GB2312" },
- { "chinese-iso-8bit", "GB2312" },
- { "cn-gb-2312", "GB2312" },
- { "cp878", "KOI8-R" },
- { "cyrillic-iso-8bit", "ISO-8859-5" },
- { "cyrillic-koi8", "KOI8-R" },
- { "euc-china", "GB2312" },
- { "euc-japan", "EUC-JP" },
- { "euc-japan-1990", "EUC-JP" },
- { "euc-kr", "EUC-KR" },
- { "greek-iso-8bit", "ISO-8859-7" },
- { "iso-latin-1", "ISO-8859-1" },
- { "iso-latin-2", "ISO-8859-2" },
- { "iso-latin-5", "ISO-8859-9" },
- { "iso-latin-7", "ISO-8859-13" },
- { "iso-latin-9", "ISO-8859-15" },
- { "japanese-iso-8bit", "EUC-JP" },
- { "japanese-euc", "EUC-JP" },
- { "jis8", "EUC-JP" },
- { "korean-euc", "EUC-KR" },
- { "korean-iso-8bit", "EUC-KR" },
- { "latin-0", "ISO-8859-15" },
- { "latin-1", "ISO-8859-1" },
- { "latin-2", "ISO-8859-2" },
- { "latin-5", "ISO-8859-9" },
- { "latin-7", "ISO-8859-13" },
- { "mule-utf-16", "UTF-16" },
- { "mule-utf-16be", "UTF-16BE" },
- { "mule-utf-16-be", "UTF-16BE" },
- { "mule-utf-16be-with-signature", "UTF-16" },
- { "mule-utf-16le", "UTF-16LE" },
- { "mule-utf-16-le", "UTF-16LE" },
- { "mule-utf-16le-with-signature", "UTF-16" },
- { "mule-utf-8", "UTF-8" },
- { "utf-16-be", "UTF-16BE" },
- { "utf-16be-with-signature", "UTF-16" },
- { "utf-16-be-with-signature", "UTF-16" },
- { "utf-16-le", "UTF-16LE" },
- { "utf-16le-with-signature", "UTF-16" },
- { "utf-16-le-with-signature", "UTF-16" },
- { NULL, NULL }
-};
-
const char *groff_preconv = NULL;
/* Is the groff "preconv" helper available? If so, return its name.
@@ -833,104 +778,3 @@ const char * ATTRIBUTE_PURE get_jless_charset (const char *charset_from_locale)
return NULL;
}
-
-/* Convert Emacs-style coding tags to ones that libiconv understands. */
-static char *convert_encoding (char *encoding)
-{
- size_t encoding_len = strlen (encoding);
- const struct conversion_entry *entry;
-
-#define STRIP(s, l) do { \
- if (encoding_len > (l) && \
- !strcasecmp (encoding + encoding_len - (l), (s))) \
- encoding[encoding_len - (l)] = '\0'; \
-} while (0)
-
- STRIP ("-dos", 4);
- STRIP ("-mac", 4);
- STRIP ("-unix", 5);
-
-#undef STRIP
-
- for (entry = conversion_table; entry->from; ++entry)
- if (!strcasecmp (entry->from, encoding)) {
- free (encoding);
- return xstrdup (entry->to);
- }
-
- return encoding;
-}
-
-/* Inspect the first line of data in a pipeline for preprocessor encoding
- * declarations.
- *
- * If to_encoding and modified_line are both non-NULL, and if the encoding
- * declaration in the input does not match to_encoding, then return an
- * encoding declaration line modified to refer to the given to_encoding in
- * *modified_line. The caller should free *modified_line.
- */
-char *check_preprocessor_encoding (pipeline *p, const char *to_encoding,
- char **modified_line)
-{
- char *pp_encoding = NULL;
- const char *line = pipeline_peekline (p);
- const char *directive = NULL, *directive_end = NULL, *pp_search = NULL;
- size_t pp_encoding_len = 0;
-
- /* Some people use .\" incorrectly. We allow it for encoding
- * declarations but not for preprocessor declarations.
- */
- if (line &&
- (STRNEQ (line, PP_COOKIE, 4) || STRNEQ (line, ".\\\" ", 4))) {
- const char *newline = strchr (line, '\n');
-
- directive = line + 4;
- directive_end = newline ? newline : strchr (directive, '\0');
- pp_search = memmem (directive, directive_end - directive,
- "-*-", 3);
- }
-
- if (directive && pp_search) {
- pp_search += 3;
- while (pp_search && pp_search < directive_end && *pp_search) {
- while (*pp_search == ' ')
- ++pp_search;
- if (STRNEQ (pp_search, "coding:", 7)) {
- const char *pp_encoding_allow;
- pp_search += 7;
- while (*pp_search == ' ')
- ++pp_search;
- pp_encoding_allow = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789-_/:.()";
- pp_encoding_len = strspn (pp_search,
- pp_encoding_allow);
- pp_encoding = xstrndup (pp_search,
- pp_encoding_len);
- pp_encoding = convert_encoding (pp_encoding);
- debug ("preprocessor encoding: %s\n",
- pp_encoding);
- break;
- } else {
- pp_search = memchr (pp_search, ';',
- directive_end - pp_search);
- if (pp_search)
- ++pp_search;
- }
- }
- }
-
- if (to_encoding && modified_line &&
- pp_encoding && strcasecmp (pp_encoding, to_encoding)) {
- assert (directive_end);
- assert (pp_search);
- *modified_line = xasprintf
- ("%.*s%s%.*s\n",
- (int) (pp_search - line), line,
- to_encoding,
- (int) (directive_end - (pp_search + pp_encoding_len)),
- pp_search + pp_encoding_len);
- }
-
- return pp_encoding;
-}
diff --git a/lib/encodings.h b/lib/encodings.h
index 6f07f0c6..6de48f72 100644
--- a/lib/encodings.h
+++ b/lib/encodings.h
@@ -22,8 +22,6 @@
#include <stdbool.h>
-struct pipeline;
-
const char *get_groff_preconv (void);
char *get_page_encoding (const char *lang);
const char *get_source_encoding (const char *lang);
@@ -38,5 +36,3 @@ const char *get_roff_encoding (const char *device,
const char *get_output_encoding (const char *device);
const char *get_less_charset (const char *locale_charset);
const char *get_jless_charset (const char *locale_charset);
-char *check_preprocessor_encoding (struct pipeline *p, const char *to_code,
- char **modified_line);
diff --git a/src/man-recode.c b/src/man-recode.c
index 8ce975e3..fd5380fe 100644
--- a/src/man-recode.c
+++ b/src/man-recode.c
@@ -60,6 +60,7 @@
#include "compression.h"
#include "decompress.h"
+#include "manconv.h"
#include "manconv_client.h"
int quiet = 0;
diff --git a/src/man.c b/src/man.c
index 7069b52f..f707eea3 100644
--- a/src/man.c
+++ b/src/man.c
@@ -109,6 +109,7 @@
#include "ult_src.h"
#include "manp.h"
#include "zsoelim.h"
+#include "manconv.h"
#include "manconv_client.h"
#ifdef MAN_OWNER
diff --git a/src/manconv.c b/src/manconv.c
index 01f329cf..c3afa32f 100644
--- a/src/manconv.c
+++ b/src/manconv.c
@@ -33,6 +33,7 @@
# include "config.h"
#endif /* HAVE_CONFIG_H */
+#include <assert.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
@@ -51,6 +52,7 @@
#include "gl_list.h"
#include "xalloc.h"
#include "xstrndup.h"
+#include "xvasprintf.h"
#include "gettext.h"
#include <locale.h>
@@ -61,11 +63,164 @@
#include "pipeline.h"
#include "debug.h"
-#include "encodings.h"
#include "glcontainers.h"
#include "manconv.h"
+/* Encoding conversions from groff-1.20/src/preproc/preconv/preconv.cpp.
+ * I've only included those not already recognised by GNU libiconv.
+ */
+struct conversion_entry {
+ const char *from;
+ const char *to;
+};
+
+static struct conversion_entry conversion_table[] = {
+ { "chinese-big5", "Big5" },
+ { "chinese-euc", "GB2312" },
+ { "chinese-iso-8bit", "GB2312" },
+ { "cn-gb-2312", "GB2312" },
+ { "cp878", "KOI8-R" },
+ { "cyrillic-iso-8bit", "ISO-8859-5" },
+ { "cyrillic-koi8", "KOI8-R" },
+ { "euc-china", "GB2312" },
+ { "euc-japan", "EUC-JP" },
+ { "euc-japan-1990", "EUC-JP" },
+ { "euc-kr", "EUC-KR" },
+ { "greek-iso-8bit", "ISO-8859-7" },
+ { "iso-latin-1", "ISO-8859-1" },
+ { "iso-latin-2", "ISO-8859-2" },
+ { "iso-latin-5", "ISO-8859-9" },
+ { "iso-latin-7", "ISO-8859-13" },
+ { "iso-latin-9", "ISO-8859-15" },
+ { "japanese-iso-8bit", "EUC-JP" },
+ { "japanese-euc", "EUC-JP" },
+ { "jis8", "EUC-JP" },
+ { "korean-euc", "EUC-KR" },
+ { "korean-iso-8bit", "EUC-KR" },
+ { "latin-0", "ISO-8859-15" },
+ { "latin-1", "ISO-8859-1" },
+ { "latin-2", "ISO-8859-2" },
+ { "latin-5", "ISO-8859-9" },
+ { "latin-7", "ISO-8859-13" },
+ { "mule-utf-16", "UTF-16" },
+ { "mule-utf-16be", "UTF-16BE" },
+ { "mule-utf-16-be", "UTF-16BE" },
+ { "mule-utf-16be-with-signature", "UTF-16" },
+ { "mule-utf-16le", "UTF-16LE" },
+ { "mule-utf-16-le", "UTF-16LE" },
+ { "mule-utf-16le-with-signature", "UTF-16" },
+ { "mule-utf-8", "UTF-8" },
+ { "utf-16-be", "UTF-16BE" },
+ { "utf-16be-with-signature", "UTF-16" },
+ { "utf-16-be-with-signature", "UTF-16" },
+ { "utf-16-le", "UTF-16LE" },
+ { "utf-16le-with-signature", "UTF-16" },
+ { "utf-16-le-with-signature", "UTF-16" },
+ { NULL, NULL }
+};
+
+/* Convert Emacs-style coding tags to ones that libiconv understands. */
+static char *convert_encoding (char *encoding)
+{
+ size_t encoding_len = strlen (encoding);
+ const struct conversion_entry *entry;
+
+#define STRIP(s, l) do { \
+ if (encoding_len > (l) && \
+ !strcasecmp (encoding + encoding_len - (l), (s))) \
+ encoding[encoding_len - (l)] = '\0'; \
+} while (0)
+
+ STRIP ("-dos", 4);
+ STRIP ("-mac", 4);
+ STRIP ("-unix", 5);
+
+#undef STRIP
+
+ for (entry = conversion_table; entry->from; ++entry)
+ if (!strcasecmp (entry->from, encoding)) {
+ free (encoding);
+ return xstrdup (entry->to);
+ }
+
+ return encoding;
+}
+
+/* Inspect the first line of data in a pipeline for preprocessor encoding
+ * declarations.
+ *
+ * If to_encoding and modified_line are both non-NULL, and if the encoding
+ * declaration in the input does not match to_encoding, then return an
+ * encoding declaration line modified to refer to the given to_encoding in
+ * *modified_line. The caller should free *modified_line.
+ */
+char *check_preprocessor_encoding (pipeline *p, const char *to_encoding,
+ char **modified_line)
+{
+ char *pp_encoding = NULL;
+ const char *line = pipeline_peekline (p);
+ const char *directive = NULL, *directive_end = NULL, *pp_search = NULL;
+ size_t pp_encoding_len = 0;
+
+ /* Some people use .\" incorrectly. We allow it for encoding
+ * declarations but not for preprocessor declarations.
+ */
+ if (line &&
+ (STRNEQ (line, PP_COOKIE, 4) || STRNEQ (line, ".\\\" ", 4))) {
+ const char *newline = strchr (line, '\n');
+
+ directive = line + 4;
+ directive_end = newline ? newline : strchr (directive, '\0');
+ pp_search = memmem (directive, directive_end - directive,
+ "-*-", 3);
+ }
+
+ if (directive && pp_search) {
+ pp_search += 3;
+ while (pp_search && pp_search < directive_end && *pp_search) {
+ while (*pp_search == ' ')
+ ++pp_search;
+ if (STRNEQ (pp_search, "coding:", 7)) {
+ const char *pp_encoding_allow;
+ pp_search += 7;
+ while (*pp_search == ' ')
+ ++pp_search;
+ pp_encoding_allow = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789-_/:.()";
+ pp_encoding_len = strspn (pp_search,
+ pp_encoding_allow);
+ pp_encoding = xstrndup (pp_search,
+ pp_encoding_len);
+ pp_encoding = convert_encoding (pp_encoding);
+ debug ("preprocessor encoding: %s\n",
+ pp_encoding);
+ break;
+ } else {
+ pp_search = memchr (pp_search, ';',
+ directive_end - pp_search);
+ if (pp_search)
+ ++pp_search;
+ }
+ }
+ }
+
+ if (to_encoding && modified_line &&
+ pp_encoding && strcasecmp (pp_encoding, to_encoding)) {
+ assert (directive_end);
+ assert (pp_search);
+ *modified_line = xasprintf
+ ("%.*s%s%.*s\n",
+ (int) (pp_search - line), line,
+ to_encoding,
+ (int) (directive_end - (pp_search + pp_encoding_len)),
+ pp_search + pp_encoding_len);
+ }
+
+ return pp_encoding;
+}
+
#ifdef HAVE_ICONV
/* When converting text containing an invalid multibyte sequence to
diff --git a/src/manconv.h b/src/manconv.h
index ad8dd99a..5d3de1de 100644
--- a/src/manconv.h
+++ b/src/manconv.h
@@ -24,4 +24,6 @@
struct pipeline;
+char *check_preprocessor_encoding (struct pipeline *p, const char *to_code,
+ char **modified_line);
void manconv (struct pipeline *p, gl_list_t from, const char *to);