diff options
author | Joey Hess <joeyh@joeyh.name> | 2024-04-18 14:23:05 -0400 |
---|---|---|
committer | Joey Hess <joeyh@joeyh.name> | 2024-04-18 14:23:38 -0400 |
commit | c410b2bb73345c19d18c7664fd9f48192c4aaa3a (patch) | |
tree | b73f2fd5220fe02f020277d1d6d40b8d6ee00df8 | |
parent | b700c48b1551597d6ea82930f576841902e97178 (diff) |
annex.maxextensions configuration
Controls how many filename extensions to preserve.
Sponsored-by: the NIH-funded NICEMAN (ReproNim TR&D3) project
-rw-r--r-- | Annex/View.hs | 2 | ||||
-rw-r--r-- | Annex/View/ViewedFile.hs | 21 | ||||
-rw-r--r-- | Backend/Hash.hs | 15 | ||||
-rw-r--r-- | Backend/Utilities.hs | 17 | ||||
-rw-r--r-- | CHANGELOG | 2 | ||||
-rw-r--r-- | Types/GitConfig.hs | 2 | ||||
-rw-r--r-- | doc/git-annex.mdwn | 12 | ||||
-rw-r--r-- | doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__.mdwn | 2 | ||||
-rw-r--r-- | doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__/comment_1_b3e9fcb09b6455301a5f7a9bf50a8a49._comment | 2 |
9 files changed, 52 insertions, 23 deletions
diff --git a/Annex/View.hs b/Annex/View.hs index 7372287380..482ce17c3a 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -387,7 +387,7 @@ prop_view_roundtrips (AssociatedFile Nothing) _ _ = True prop_view_roundtrips (AssociatedFile (Just f)) metadata visible = or [ B.null (P.takeFileName f) && B.null (P.takeDirectory f) , viewTooLarge view - , all hasfields (viewedFiles view (viewedFileFromReference' Nothing) (fromRawFilePath f) metadata) + , all hasfields (viewedFiles view (viewedFileFromReference' Nothing Nothing) (fromRawFilePath f) metadata) ] where view = View (Git.Ref "foo") $ diff --git a/Annex/View/ViewedFile.hs b/Annex/View/ViewedFile.hs index 6aa992babb..84dcbc897a 100644 --- a/Annex/View/ViewedFile.hs +++ b/Annex/View/ViewedFile.hs @@ -1,6 +1,6 @@ {- filenames (not paths) used in views - - - Copyright 2014-2023 Joey Hess <id@joeyh.name> + - Copyright 2014-2024 Joey Hess <id@joeyh.name> - - Licensed under the GNU AGPL version 3 or higher. -} @@ -19,6 +19,7 @@ module Annex.View.ViewedFile ( import Annex.Common import Utility.QuickCheck +import Backend.Utilities (maxExtensions) import qualified Data.ByteString as S @@ -37,10 +38,12 @@ type MkViewedFile = FilePath -> ViewedFile - So, from dir/subdir/file.foo, generate file_%dir%subdir%.foo -} viewedFileFromReference :: GitConfig -> MkViewedFile -viewedFileFromReference g = viewedFileFromReference' (annexMaxExtensionLength g) +viewedFileFromReference g = viewedFileFromReference' + (annexMaxExtensionLength g) + (annexMaxExtensions g) -viewedFileFromReference' :: Maybe Int -> MkViewedFile -viewedFileFromReference' maxextlen f = concat $ +viewedFileFromReference' :: Maybe Int -> Maybe Int -> MkViewedFile +viewedFileFromReference' maxextlen maxextensions f = concat $ [ escape (fromRawFilePath base') , if null dirs then "" else "_%" ++ intercalate "%" (map escape dirs) ++ "%" , escape $ fromRawFilePath $ S.concat extensions' @@ -51,11 +54,12 @@ viewedFileFromReference' maxextlen f = concat $ (base, extensions) = case maxextlen of Nothing -> splitShortExtensions (toRawFilePath basefile') Just n -> splitShortExtensions' (n+1) (toRawFilePath basefile') - {- Limit to two extensions maximum. -} + {- Limit number of extensions. -} + maxextensions' = fromMaybe maxExtensions maxextensions (base', extensions') - | length extensions <= 2 = (base, extensions) + | length extensions <= maxextensions' = (base, extensions) | otherwise = - let (es,more) = splitAt 2 (reverse extensions) + let (es,more) = splitAt maxextensions' (reverse extensions) in (base <> mconcat (reverse more), reverse es) {- On Windows, if the filename looked like "dir/c:foo" then - basefile would look like it contains a drive letter, which will @@ -101,7 +105,8 @@ prop_viewedFile_roundtrips tf -- Relative filenames wanted, not directories. | any (isPathSeparator) (end f ++ beginning f) = True | isAbsolute f || isDrive f = True - | otherwise = dir == dirFromViewedFile (viewedFileFromReference' Nothing f) + | otherwise = dir == dirFromViewedFile + (viewedFileFromReference' Nothing Nothing f) where f = fromTestableFilePath tf dir = joinPath $ beginning $ splitDirectories f diff --git a/Backend/Hash.hs b/Backend/Hash.hs index 9768550adf..fc0a8b8591 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -170,11 +170,14 @@ needsUpgrade key = or ] trivialMigrate :: Key -> Backend -> AssociatedFile -> Bool -> Annex (Maybe Key) -trivialMigrate oldkey newbackend afile _inannex = trivialMigrate' oldkey newbackend afile - <$> (annexMaxExtensionLength <$> Annex.getGitConfig) - -trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Key -trivialMigrate' oldkey newbackend afile maxextlen +trivialMigrate oldkey newbackend afile _inannex = do + c <- Annex.getGitConfig + return $ trivialMigrate' oldkey newbackend afile + (annexMaxExtensionLength c) + (annexMaxExtensions c) + +trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Int -> Maybe Key +trivialMigrate' oldkey newbackend afile maxextlen maxexts {- Fast migration from hashE to hash backend. -} | migratable && hasExt oldvariety = Just $ alterKey oldkey $ \d -> d { keyName = S.toShort (keyHash oldkey) @@ -185,7 +188,7 @@ trivialMigrate' oldkey newbackend afile maxextlen AssociatedFile Nothing -> Nothing AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d { keyName = S.toShort $ keyHash oldkey - <> selectExtension maxextlen file + <> selectExtension maxextlen maxexts file , keyVariety = newvariety } {- Upgrade to fix bad previous migration that created a diff --git a/Backend/Utilities.hs b/Backend/Utilities.hs index 3b68eed624..304cfaac16 100644 --- a/Backend/Utilities.hs +++ b/Backend/Utilities.hs @@ -45,20 +45,24 @@ genKeyName s - file that the key was generated from. -} addE :: KeySource -> (KeyVariety -> KeyVariety) -> Key -> Annex Key addE source sethasext k = do - maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig - let ext = selectExtension maxlen (keyFilename source) + c <- Annex.getGitConfig + let ext = selectExtension + (annexMaxExtensionLength c) + (annexMaxExtensions c) + (keyFilename source) return $ alterKey k $ \d -> d { keyName = keyName d <> S.toShort ext , keyVariety = sethasext (keyVariety d) } -selectExtension :: Maybe Int -> RawFilePath -> S.ByteString -selectExtension maxlen f +selectExtension :: Maybe Int -> Maybe Int -> RawFilePath -> S.ByteString +selectExtension maxlen maxextensions f | null es = "" | otherwise = S.intercalate "." ("":es) where es = filter (not . S.null) $ reverse $ - take 2 $ filter (S.all validInExtension) $ + take (fromMaybe maxExtensions maxextensions) $ + filter (S.all validInExtension) $ takeWhile shortenough $ reverse $ S.split (fromIntegral (ord '.')) (P.takeExtensions f') shortenough e = S.length e <= fromMaybe maxExtensionLen maxlen @@ -75,3 +79,6 @@ validInExtension c maxExtensionLen :: Int maxExtensionLen = 4 -- long enough for "jpeg" + +maxExtensions :: Int +maxExtensions = 2 -- include both extensions of "tar.gz" @@ -18,6 +18,8 @@ git-annex (10.20240228) UNRELEASED; urgency=medium * Added rclone special remote, which can be used without needing to install the git-annex-remote-rclone program. This needs a new version of rclone, which supports "rclone gitannex". + * annex.maxextensions configuration controls how many filename + extensions to preserve. -- Joey Hess <id@joeyh.name> Tue, 27 Feb 2024 13:07:10 -0400 diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index f97ee52192..26540b8484 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -136,6 +136,7 @@ data GitConfig = GitConfig , annexAllowedIPAddresses :: String , annexAllowUnverifiedDownloads :: Bool , annexMaxExtensionLength :: Maybe Int + , annexMaxExtensions :: Maybe Int , annexJobs :: Concurrency , annexCacheCreds :: Bool , annexAutoUpgradeRepository :: Bool @@ -244,6 +245,7 @@ extractGitConfig configsource r = GitConfig , annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $ getmaybe (annexConfig "security.allow-unverified-downloads") , annexMaxExtensionLength = getmayberead (annexConfig "maxextensionlength") + , annexMaxExtensions = getmayberead (annexConfig "maxextensions") , annexJobs = fromMaybe NonConcurrent $ parseConcurrency =<< getmaybe (annexConfig "jobs") , annexCacheCreds = getbool (annexConfig "cachecreds") True diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 60acd0573c..59dacb0229 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -873,8 +873,16 @@ repository, using [[git-annex-config]]. See its man page for a list.) and also when generating a view branch. The default length is 4, which allows extensions like "jpeg". The dot before - the extension is not counted part of its length. At most two extensions - at the end of a filename will be preserved, e.g. .gz or .tar.gz . + the extension is not counted part of its length. + +* `annex.maxextensions` + + Maximum number of filename extensions to preserve when using a backend + that preserves filename extensions, and also when generating a view + branch. + + The default is 2, which allows for compound extensions like ".tar.gz". + When set to 1, it will only preserve the last extension, eg ".gz". * `annex.diskreserve` diff --git a/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__.mdwn b/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__.mdwn index 83e957c8a8..1c2e8dfd93 100644 --- a/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__.mdwn +++ b/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__.mdwn @@ -12,3 +12,5 @@ Just throwing against the wall to see if sticks [[!meta author=yoh]] [[!tag projects/repronim]] + +> added annex.maxextensions config, [[done]] --[[Joey]] diff --git a/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__/comment_1_b3e9fcb09b6455301a5f7a9bf50a8a49._comment b/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__/comment_1_b3e9fcb09b6455301a5f7a9bf50a8a49._comment index 55d8038989..1bc0b84b5d 100644 --- a/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__/comment_1_b3e9fcb09b6455301a5f7a9bf50a8a49._comment +++ b/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__/comment_1_b3e9fcb09b6455301a5f7a9bf50a8a49._comment @@ -9,7 +9,7 @@ extension. For a .mkv file, I'd guess most video players don't care about the extension. annex.maxextensionlength won't help here, but I think it makes sense to add -an analagous annex.maxextensioncount which would default to 2 (as it +an analagous annex.maxextensions which would default to 2 (as it currently does to handle .tar.gz) but you could set to 1. It might also be a reasonable argument that filename extensions are not |