summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2024-04-18 14:23:05 -0400
committerJoey Hess <joeyh@joeyh.name>2024-04-18 14:23:38 -0400
commitc410b2bb73345c19d18c7664fd9f48192c4aaa3a (patch)
treeb73f2fd5220fe02f020277d1d6d40b8d6ee00df8
parentb700c48b1551597d6ea82930f576841902e97178 (diff)
annex.maxextensions configuration
Controls how many filename extensions to preserve. Sponsored-by: the NIH-funded NICEMAN (ReproNim TR&D3) project
-rw-r--r--Annex/View.hs2
-rw-r--r--Annex/View/ViewedFile.hs21
-rw-r--r--Backend/Hash.hs15
-rw-r--r--Backend/Utilities.hs17
-rw-r--r--CHANGELOG2
-rw-r--r--Types/GitConfig.hs2
-rw-r--r--doc/git-annex.mdwn12
-rw-r--r--doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__.mdwn2
-rw-r--r--doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__/comment_1_b3e9fcb09b6455301a5f7a9bf50a8a49._comment2
9 files changed, 52 insertions, 23 deletions
diff --git a/Annex/View.hs b/Annex/View.hs
index 7372287380..482ce17c3a 100644
--- a/Annex/View.hs
+++ b/Annex/View.hs
@@ -387,7 +387,7 @@ prop_view_roundtrips (AssociatedFile Nothing) _ _ = True
prop_view_roundtrips (AssociatedFile (Just f)) metadata visible = or
[ B.null (P.takeFileName f) && B.null (P.takeDirectory f)
, viewTooLarge view
- , all hasfields (viewedFiles view (viewedFileFromReference' Nothing) (fromRawFilePath f) metadata)
+ , all hasfields (viewedFiles view (viewedFileFromReference' Nothing Nothing) (fromRawFilePath f) metadata)
]
where
view = View (Git.Ref "foo") $
diff --git a/Annex/View/ViewedFile.hs b/Annex/View/ViewedFile.hs
index 6aa992babb..84dcbc897a 100644
--- a/Annex/View/ViewedFile.hs
+++ b/Annex/View/ViewedFile.hs
@@ -1,6 +1,6 @@
{- filenames (not paths) used in views
-
- - Copyright 2014-2023 Joey Hess <id@joeyh.name>
+ - Copyright 2014-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@@ -19,6 +19,7 @@ module Annex.View.ViewedFile (
import Annex.Common
import Utility.QuickCheck
+import Backend.Utilities (maxExtensions)
import qualified Data.ByteString as S
@@ -37,10 +38,12 @@ type MkViewedFile = FilePath -> ViewedFile
- So, from dir/subdir/file.foo, generate file_%dir%subdir%.foo
-}
viewedFileFromReference :: GitConfig -> MkViewedFile
-viewedFileFromReference g = viewedFileFromReference' (annexMaxExtensionLength g)
+viewedFileFromReference g = viewedFileFromReference'
+ (annexMaxExtensionLength g)
+ (annexMaxExtensions g)
-viewedFileFromReference' :: Maybe Int -> MkViewedFile
-viewedFileFromReference' maxextlen f = concat $
+viewedFileFromReference' :: Maybe Int -> Maybe Int -> MkViewedFile
+viewedFileFromReference' maxextlen maxextensions f = concat $
[ escape (fromRawFilePath base')
, if null dirs then "" else "_%" ++ intercalate "%" (map escape dirs) ++ "%"
, escape $ fromRawFilePath $ S.concat extensions'
@@ -51,11 +54,12 @@ viewedFileFromReference' maxextlen f = concat $
(base, extensions) = case maxextlen of
Nothing -> splitShortExtensions (toRawFilePath basefile')
Just n -> splitShortExtensions' (n+1) (toRawFilePath basefile')
- {- Limit to two extensions maximum. -}
+ {- Limit number of extensions. -}
+ maxextensions' = fromMaybe maxExtensions maxextensions
(base', extensions')
- | length extensions <= 2 = (base, extensions)
+ | length extensions <= maxextensions' = (base, extensions)
| otherwise =
- let (es,more) = splitAt 2 (reverse extensions)
+ let (es,more) = splitAt maxextensions' (reverse extensions)
in (base <> mconcat (reverse more), reverse es)
{- On Windows, if the filename looked like "dir/c:foo" then
- basefile would look like it contains a drive letter, which will
@@ -101,7 +105,8 @@ prop_viewedFile_roundtrips tf
-- Relative filenames wanted, not directories.
| any (isPathSeparator) (end f ++ beginning f) = True
| isAbsolute f || isDrive f = True
- | otherwise = dir == dirFromViewedFile (viewedFileFromReference' Nothing f)
+ | otherwise = dir == dirFromViewedFile
+ (viewedFileFromReference' Nothing Nothing f)
where
f = fromTestableFilePath tf
dir = joinPath $ beginning $ splitDirectories f
diff --git a/Backend/Hash.hs b/Backend/Hash.hs
index 9768550adf..fc0a8b8591 100644
--- a/Backend/Hash.hs
+++ b/Backend/Hash.hs
@@ -170,11 +170,14 @@ needsUpgrade key = or
]
trivialMigrate :: Key -> Backend -> AssociatedFile -> Bool -> Annex (Maybe Key)
-trivialMigrate oldkey newbackend afile _inannex = trivialMigrate' oldkey newbackend afile
- <$> (annexMaxExtensionLength <$> Annex.getGitConfig)
-
-trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Key
-trivialMigrate' oldkey newbackend afile maxextlen
+trivialMigrate oldkey newbackend afile _inannex = do
+ c <- Annex.getGitConfig
+ return $ trivialMigrate' oldkey newbackend afile
+ (annexMaxExtensionLength c)
+ (annexMaxExtensions c)
+
+trivialMigrate' :: Key -> Backend -> AssociatedFile -> Maybe Int -> Maybe Int -> Maybe Key
+trivialMigrate' oldkey newbackend afile maxextlen maxexts
{- Fast migration from hashE to hash backend. -}
| migratable && hasExt oldvariety = Just $ alterKey oldkey $ \d -> d
{ keyName = S.toShort (keyHash oldkey)
@@ -185,7 +188,7 @@ trivialMigrate' oldkey newbackend afile maxextlen
AssociatedFile Nothing -> Nothing
AssociatedFile (Just file) -> Just $ alterKey oldkey $ \d -> d
{ keyName = S.toShort $ keyHash oldkey
- <> selectExtension maxextlen file
+ <> selectExtension maxextlen maxexts file
, keyVariety = newvariety
}
{- Upgrade to fix bad previous migration that created a
diff --git a/Backend/Utilities.hs b/Backend/Utilities.hs
index 3b68eed624..304cfaac16 100644
--- a/Backend/Utilities.hs
+++ b/Backend/Utilities.hs
@@ -45,20 +45,24 @@ genKeyName s
- file that the key was generated from. -}
addE :: KeySource -> (KeyVariety -> KeyVariety) -> Key -> Annex Key
addE source sethasext k = do
- maxlen <- annexMaxExtensionLength <$> Annex.getGitConfig
- let ext = selectExtension maxlen (keyFilename source)
+ c <- Annex.getGitConfig
+ let ext = selectExtension
+ (annexMaxExtensionLength c)
+ (annexMaxExtensions c)
+ (keyFilename source)
return $ alterKey k $ \d -> d
{ keyName = keyName d <> S.toShort ext
, keyVariety = sethasext (keyVariety d)
}
-selectExtension :: Maybe Int -> RawFilePath -> S.ByteString
-selectExtension maxlen f
+selectExtension :: Maybe Int -> Maybe Int -> RawFilePath -> S.ByteString
+selectExtension maxlen maxextensions f
| null es = ""
| otherwise = S.intercalate "." ("":es)
where
es = filter (not . S.null) $ reverse $
- take 2 $ filter (S.all validInExtension) $
+ take (fromMaybe maxExtensions maxextensions) $
+ filter (S.all validInExtension) $
takeWhile shortenough $
reverse $ S.split (fromIntegral (ord '.')) (P.takeExtensions f')
shortenough e = S.length e <= fromMaybe maxExtensionLen maxlen
@@ -75,3 +79,6 @@ validInExtension c
maxExtensionLen :: Int
maxExtensionLen = 4 -- long enough for "jpeg"
+
+maxExtensions :: Int
+maxExtensions = 2 -- include both extensions of "tar.gz"
diff --git a/CHANGELOG b/CHANGELOG
index 5e73fd0dac..735d81282c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -18,6 +18,8 @@ git-annex (10.20240228) UNRELEASED; urgency=medium
* Added rclone special remote, which can be used without needing
to install the git-annex-remote-rclone program. This needs
a new version of rclone, which supports "rclone gitannex".
+ * annex.maxextensions configuration controls how many filename
+ extensions to preserve.
-- Joey Hess <id@joeyh.name> Tue, 27 Feb 2024 13:07:10 -0400
diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs
index f97ee52192..26540b8484 100644
--- a/Types/GitConfig.hs
+++ b/Types/GitConfig.hs
@@ -136,6 +136,7 @@ data GitConfig = GitConfig
, annexAllowedIPAddresses :: String
, annexAllowUnverifiedDownloads :: Bool
, annexMaxExtensionLength :: Maybe Int
+ , annexMaxExtensions :: Maybe Int
, annexJobs :: Concurrency
, annexCacheCreds :: Bool
, annexAutoUpgradeRepository :: Bool
@@ -244,6 +245,7 @@ extractGitConfig configsource r = GitConfig
, annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
getmaybe (annexConfig "security.allow-unverified-downloads")
, annexMaxExtensionLength = getmayberead (annexConfig "maxextensionlength")
+ , annexMaxExtensions = getmayberead (annexConfig "maxextensions")
, annexJobs = fromMaybe NonConcurrent $
parseConcurrency =<< getmaybe (annexConfig "jobs")
, annexCacheCreds = getbool (annexConfig "cachecreds") True
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 60acd0573c..59dacb0229 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -873,8 +873,16 @@ repository, using [[git-annex-config]]. See its man page for a list.)
and also when generating a view branch.
The default length is 4, which allows extensions like "jpeg". The dot before
- the extension is not counted part of its length. At most two extensions
- at the end of a filename will be preserved, e.g. .gz or .tar.gz .
+ the extension is not counted part of its length.
+
+* `annex.maxextensions`
+
+ Maximum number of filename extensions to preserve when using a backend
+ that preserves filename extensions, and also when generating a view
+ branch.
+
+ The default is 2, which allows for compound extensions like ".tar.gz".
+ When set to 1, it will only preserve the last extension, eg ".gz".
* `annex.diskreserve`
diff --git a/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__.mdwn b/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__.mdwn
index 83e957c8a8..1c2e8dfd93 100644
--- a/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__.mdwn
+++ b/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__.mdwn
@@ -12,3 +12,5 @@ Just throwing against the wall to see if sticks
[[!meta author=yoh]]
[[!tag projects/repronim]]
+
+> added annex.maxextensions config, [[done]] --[[Joey]]
diff --git a/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__/comment_1_b3e9fcb09b6455301a5f7a9bf50a8a49._comment b/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__/comment_1_b3e9fcb09b6455301a5f7a9bf50a8a49._comment
index 55d8038989..1bc0b84b5d 100644
--- a/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__/comment_1_b3e9fcb09b6455301a5f7a9bf50a8a49._comment
+++ b/doc/todo/way_to_instruct_on_how_to_decide_on_extension__63__/comment_1_b3e9fcb09b6455301a5f7a9bf50a8a49._comment
@@ -9,7 +9,7 @@ extension. For a .mkv file, I'd guess most video players don't care about
the extension.
annex.maxextensionlength won't help here, but I think it makes sense to add
-an analagous annex.maxextensioncount which would default to 2 (as it
+an analagous annex.maxextensions which would default to 2 (as it
currently does to handle .tar.gz) but you could set to 1.
It might also be a reasonable argument that filename extensions are not