summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorColin Watson <cjwatson@debian.org>2022-10-01 23:03:41 +0100
committerColin Watson <cjwatson@debian.org>2022-10-01 23:03:42 +0100
commite8d93bc31a15ff87b5872cc76ff292206a6eaaa1 (patch)
tree854c7d5bc25e851a82a3ec9e12ef32f1b9ed3930
parent0beeda0969bf8d9e70d08be0948c40688aa39a15 (diff)
test_manfile: Remove "already exists" check
This produced inconsistent results in some unusual cases. For example, if `/usr/share/man/man5/inetd.conf.5.gz` was a symlink to `/usr/share/man/man8/inetd.8.gz` and had `NAME` entries for both `inetd` and `inetd.conf`, then if `inetd.8.gz` was scanned first we would include a `WHATIS_MAN` entry in section 8 for `inetd.conf`, while if `inetd.conf.5.gz` was scanned first we'd skip that due to hitting the already-exists check when scanning `inetd.8.gz`. This was originally a performance optimization. Now that we cache the results of `ult_src` and `find_name`, this optimization no longer helps us much, so it can go. * src/check_mandirs.c (test_manfile): Don't return early if the page already exists with matching details. * NEWS.md: Document this.
-rw-r--r--NEWS.md5
-rw-r--r--src/check_mandirs.c55
2 files changed, 21 insertions, 39 deletions
diff --git a/NEWS.md b/NEWS.md
index af7c534f..72fb1f0c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -13,8 +13,9 @@ Fixes:
the correct type.
* Store links in the database using the section and extension of the link
rather than of the ultimate source file.
- * Only skip adding a page to the database due to an existing entry if the
- entry represents an ultimate source.
+ * Consider pages for adding to the database even if they seem to already
+ exist; this performance optimization is no longer needed due to caching,
+ and it produced inconsistent results in some unusual cases.
Improvements:
diff --git a/src/check_mandirs.c b/src/check_mandirs.c
index 3e4e5464..33fbaace 100644
--- a/src/check_mandirs.c
+++ b/src/check_mandirs.c
@@ -193,49 +193,30 @@ void test_manfile (MYDBM_FILE dbf, const char *file, const char *path)
return;
}
- /* See if we already have it, before going any further. This will
- * save both an ult_src() and a find_name(), amongst other wastes of
- * time.
+ /* Check for multiple pages whose details match except for having
+ * different compression extensions.
*/
exists = dblookup_exact (dbf, manpage_base, info->ext, true);
+ if (exists && !STREQ (exists->comp, info->comp ? info->comp : "-")) {
+ char *abs_filename;
- /* Ensure we really have the actual page. Gzip keeps the mtime the
- * same when it compresses, so we have to compare compression
- * extensions as well.
- */
- if (exists) {
- if (strcmp (exists->comp,
- info->comp ? info->comp : "-") == 0) {
- if (timespec_cmp (exists->mtime, info->mtime) == 0 &&
- exists->id == ULT_MAN) {
- debug ("test_manfile: already exists\n");
- free_mandata_struct (exists);
- free_mandata_struct (info);
- return;
- }
+ /* If the cached file still exists, then we have a collision:
+ * two pages that only differ by compression extension.
+ */
+ abs_filename = make_filename (path, NULL, exists, "man");
+ if (!abs_filename) {
+ if (!opt_test)
+ dbdelete (dbf, manpage_base, exists);
} else {
- char *abs_filename;
-
- /* see if the cached file actually exists. It's
- evident at this point that we have multiple
- comp extensions */
- abs_filename = make_filename (path, NULL,
- exists, "man");
- if (!abs_filename) {
- if (!opt_test)
- dbdelete (dbf, manpage_base, exists);
- } else {
- gripe_multi_extensions (path, exists->sec,
- manpage_base,
- exists->ext);
- free (abs_filename);
- free_mandata_struct (exists);
- free_mandata_struct (info);
- return;
- }
+ gripe_multi_extensions (path, exists->sec,
+ manpage_base, exists->ext);
+ free (abs_filename);
+ free_mandata_struct (exists);
+ free_mandata_struct (info);
+ return;
}
- free_mandata_struct (exists);
}
+ free_mandata_struct (exists);
/* Trace the file to its ultimate source, otherwise we'll be
* looking for whatis info in files containing only '.so