diff options
author | Colin Watson <cjwatson@debian.org> | 2022-10-01 23:03:41 +0100 |
---|---|---|
committer | Colin Watson <cjwatson@debian.org> | 2022-10-01 23:03:42 +0100 |
commit | e8d93bc31a15ff87b5872cc76ff292206a6eaaa1 (patch) | |
tree | 854c7d5bc25e851a82a3ec9e12ef32f1b9ed3930 | |
parent | 0beeda0969bf8d9e70d08be0948c40688aa39a15 (diff) |
test_manfile: Remove "already exists" check
This produced inconsistent results in some unusual cases. For example,
if `/usr/share/man/man5/inetd.conf.5.gz` was a symlink to
`/usr/share/man/man8/inetd.8.gz` and had `NAME` entries for both `inetd`
and `inetd.conf`, then if `inetd.8.gz` was scanned first we would
include a `WHATIS_MAN` entry in section 8 for `inetd.conf`, while if
`inetd.conf.5.gz` was scanned first we'd skip that due to hitting the
already-exists check when scanning `inetd.8.gz`.
This was originally a performance optimization. Now that we cache the
results of `ult_src` and `find_name`, this optimization no longer helps
us much, so it can go.
* src/check_mandirs.c (test_manfile): Don't return early if the page
already exists with matching details.
* NEWS.md: Document this.
-rw-r--r-- | NEWS.md | 5 | ||||
-rw-r--r-- | src/check_mandirs.c | 55 |
2 files changed, 21 insertions, 39 deletions
@@ -13,8 +13,9 @@ Fixes: the correct type. * Store links in the database using the section and extension of the link rather than of the ultimate source file. - * Only skip adding a page to the database due to an existing entry if the - entry represents an ultimate source. + * Consider pages for adding to the database even if they seem to already + exist; this performance optimization is no longer needed due to caching, + and it produced inconsistent results in some unusual cases. Improvements: diff --git a/src/check_mandirs.c b/src/check_mandirs.c index 3e4e5464..33fbaace 100644 --- a/src/check_mandirs.c +++ b/src/check_mandirs.c @@ -193,49 +193,30 @@ void test_manfile (MYDBM_FILE dbf, const char *file, const char *path) return; } - /* See if we already have it, before going any further. This will - * save both an ult_src() and a find_name(), amongst other wastes of - * time. + /* Check for multiple pages whose details match except for having + * different compression extensions. */ exists = dblookup_exact (dbf, manpage_base, info->ext, true); + if (exists && !STREQ (exists->comp, info->comp ? info->comp : "-")) { + char *abs_filename; - /* Ensure we really have the actual page. Gzip keeps the mtime the - * same when it compresses, so we have to compare compression - * extensions as well. - */ - if (exists) { - if (strcmp (exists->comp, - info->comp ? info->comp : "-") == 0) { - if (timespec_cmp (exists->mtime, info->mtime) == 0 && - exists->id == ULT_MAN) { - debug ("test_manfile: already exists\n"); - free_mandata_struct (exists); - free_mandata_struct (info); - return; - } + /* If the cached file still exists, then we have a collision: + * two pages that only differ by compression extension. + */ + abs_filename = make_filename (path, NULL, exists, "man"); + if (!abs_filename) { + if (!opt_test) + dbdelete (dbf, manpage_base, exists); } else { - char *abs_filename; - - /* see if the cached file actually exists. It's - evident at this point that we have multiple - comp extensions */ - abs_filename = make_filename (path, NULL, - exists, "man"); - if (!abs_filename) { - if (!opt_test) - dbdelete (dbf, manpage_base, exists); - } else { - gripe_multi_extensions (path, exists->sec, - manpage_base, - exists->ext); - free (abs_filename); - free_mandata_struct (exists); - free_mandata_struct (info); - return; - } + gripe_multi_extensions (path, exists->sec, + manpage_base, exists->ext); + free (abs_filename); + free_mandata_struct (exists); + free_mandata_struct (info); + return; } - free_mandata_struct (exists); } + free_mandata_struct (exists); /* Trace the file to its ultimate source, otherwise we'll be * looking for whatis info in files containing only '.so |