summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoey Hess <joeyh@joeyh.name>2017-03-30 19:32:58 -0400
committerJoey Hess <joeyh@joeyh.name>2017-03-30 19:35:30 -0400
commitc3970f6c1a156a74b137f1d12c27cd70eed613c8 (patch)
tree5b3fe104bd75a42ef0db785cbcbea896bd75e29b
parent39e8433d461311cba2b5053a682fab93fdf00a9d (diff)
multicast: New command, uses uftp to multicast annexed files, for eg a classroom setting.
This commit was supported by the NSF-funded DataLad project.
-rw-r--r--Annex/Multicast.hs41
-rw-r--r--CHANGELOG2
-rw-r--r--CmdLine/GitAnnex.hs4
-rw-r--r--Command/Multicast.hs253
-rw-r--r--Logs.hs3
-rw-r--r--Logs/Multicast.hs33
-rw-r--r--Utility/FileMode.hs5
-rw-r--r--debian/control1
-rw-r--r--doc/git-annex-multicast.mdwn95
-rw-r--r--doc/git-annex.mdwn6
-rw-r--r--doc/internals.mdwn4
-rw-r--r--doc/todo/multicast___34__broadcasting__34___of_content_on_local_net.mdwn3
-rw-r--r--git-annex.cabal6
13 files changed, 454 insertions, 2 deletions
diff --git a/Annex/Multicast.hs b/Annex/Multicast.hs
new file mode 100644
index 0000000000..16aa1bd335
--- /dev/null
+++ b/Annex/Multicast.hs
@@ -0,0 +1,41 @@
+{- git-annex multicast receive callback
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Annex.Multicast where
+
+import Config.Files
+import Utility.Env
+import Utility.PartialPrelude
+
+import System.Process
+import System.IO
+import GHC.IO.Handle.FD
+
+multicastReceiveEnv :: String
+multicastReceiveEnv = "GIT_ANNEX_MULTICAST_RECEIVE"
+
+multicastCallbackEnv :: IO (FilePath, [(String, String)], Handle)
+multicastCallbackEnv = do
+ gitannex <- readProgramFile
+ (rfd, wfd) <- createPipeFd
+ rh <- fdToHandle rfd
+ environ <- addEntry multicastReceiveEnv (show wfd) <$> getEnvironment
+ return (gitannex, environ, rh)
+
+-- This is run when uftpd has received a file. Rather than move
+-- the file into the annex here, which would require starting up the
+-- Annex monad, parsing git config, and verifying the content, simply
+-- output to the specified FD the filename. This keeps the time
+-- that uftpd is not receiving the next file as short as possible.
+runMulticastReceive :: [String] -> String -> IO ()
+runMulticastReceive ("-I":_sessionid:fs) hs = case readish hs of
+ Just fd -> do
+ h <- fdToHandle fd
+ mapM_ (hPutStrLn h) fs
+ hClose h
+ Nothing -> return ()
+runMulticastReceive _ _ = return ()
diff --git a/CHANGELOG b/CHANGELOG
index c58c3cf8a6..f69aadedd9 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,8 @@ git-annex (6.20170322) UNRELEASED; urgency=medium
* When a http remote does not expose an annex.uuid config, only warn
about it once, not every time git-annex is run.
+ * multicast: New command, uses uftp to multicast annexed files, for eg
+ a classroom setting.
-- Joey Hess <id@joeyh.name> Wed, 29 Mar 2017 12:41:46 -0400
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs
index 0e472005c0..be5f56ba0a 100644
--- a/CmdLine/GitAnnex.hs
+++ b/CmdLine/GitAnnex.hs
@@ -14,6 +14,7 @@ import CmdLine
import Command
import Utility.Env
import Annex.Ssh
+import Annex.Multicast
import Types.Test
import qualified Command.Help
@@ -53,6 +54,7 @@ import qualified Command.Describe
import qualified Command.InitRemote
import qualified Command.EnableRemote
import qualified Command.EnableTor
+import qualified Command.Multicast
import qualified Command.Expire
import qualified Command.Repair
import qualified Command.Unused
@@ -144,6 +146,7 @@ cmds testoptparser testrunner =
, Command.InitRemote.cmd
, Command.EnableRemote.cmd
, Command.EnableTor.cmd
+ , Command.Multicast.cmd
, Command.Reinject.cmd
, Command.Unannex.cmd
, Command.Uninit.cmd
@@ -242,4 +245,5 @@ run testoptparser testrunner args = go envmodes
envmodes =
[ (sshOptionsEnv, runSshOptions args)
, (sshAskPassEnv, runSshAskPass)
+ , (multicastReceiveEnv, runMulticastReceive args)
]
diff --git a/Command/Multicast.hs b/Command/Multicast.hs
new file mode 100644
index 0000000000..cd74c3ebcf
--- /dev/null
+++ b/Command/Multicast.hs
@@ -0,0 +1,253 @@
+{- git-annex command
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+{-# LANGUAGE CPP #-}
+
+module Command.Multicast where
+
+import Command
+import Logs.Multicast
+import Annex.Multicast
+import Annex.WorkTree
+import Annex.Content
+import Annex.UUID
+#ifndef mingw32_HOST_OS
+import Creds
+import Annex.Perms
+import Utility.FileMode
+#endif
+import qualified Limit
+import Types.FileMatcher
+import qualified Git.LsFiles as LsFiles
+import Utility.Hash
+import Utility.Tmp
+import Config
+
+import Data.Char
+import qualified Data.ByteString.Lazy.UTF8 as B8
+import qualified Data.Map as M
+import Control.Concurrent.Async
+
+cmd :: Command
+cmd = command "multicast" SectionCommon "multicast file distribution"
+ paramNothing (seek <$$> optParser)
+
+data MultiCastAction
+ = GenAddress
+ | Send
+ | Receive
+ deriving (Show)
+
+data MultiCastOptions = MultiCastOptions MultiCastAction [CommandParam] [FilePath]
+ deriving (Show)
+
+optParser :: CmdParamsDesc -> Parser MultiCastOptions
+optParser _ = MultiCastOptions
+ <$> (genaddressp <|> sendp <|> receivep)
+ <*> many uftpopt
+ <*> cmdParams paramPaths
+ where
+ genaddressp = flag' GenAddress
+ ( long "gen-address"
+ <> help "generate multicast encryption key and store address in git-annex branch"
+ )
+ sendp = flag' Send
+ ( long "send"
+ <> help "multicast files"
+ )
+ receivep = flag' Receive
+ ( long "receive"
+ <> help "listen for multicast files and store in repository"
+ )
+ uftpopt = Param <$> strOption
+ ( long "uftp-opt"
+ <> short 'U'
+ <> help "passed on to uftp/uftpd"
+ <> metavar "OPTION"
+ )
+
+seek :: MultiCastOptions -> CommandSeek
+seek (MultiCastOptions GenAddress _ _) = commandAction genAddress
+seek (MultiCastOptions Send ups fs) = commandAction $ send ups fs
+seek (MultiCastOptions Receive ups []) = commandAction $ receive ups
+seek (MultiCastOptions Receive _ _) = giveup "Cannot specify list of files with --receive; this receives whatever files the sender chooses to send."
+
+genAddress :: CommandStart
+genAddress = do
+ showStart "gen-address" ""
+ k <- uftpKey
+ (s, ok) <- case k of
+ KeyContainer s -> liftIO $ genkey (Param s)
+ KeyFile f -> do
+ createAnnexDirectory (takeDirectory f)
+ liftIO $ nukeFile f
+ liftIO $ protectedOutput $ genkey (File f)
+ case (ok, parseFingerprint s) of
+ (False, _) -> giveup $ "uftp_keymgt failed: " ++ s
+ (_, Nothing) -> giveup $ "Failed to find fingerprint in uftp_keymgt output: " ++ s
+ (True, Just fp) -> next $ next $ do
+ recordFingerprint fp =<< getUUID
+ return True
+ where
+ -- Annoyingly, the fingerprint is output to stderr.
+ genkey p = processTranscript "uftp_keymgt" ps Nothing
+ where
+ ps = toCommand $
+ [ Param "-g"
+ , keyparam
+ , p
+ ]
+ -- uftp only supports rsa up to 2048 which is on the lower
+ -- limit of secure RSA key sizes. Instead, use an EC curve.
+ -- Except for on Windows XP, secp521r1 is supported on all
+ -- platforms by uftp. DJB thinks it's pretty good compared
+ -- with other NIST curves: "there's one standard NIST curve
+ -- using a nice prime, namely 2521−1 but the sheer size of this
+ -- prime makes it much slower than NIST P-256"
+ -- (http://blog.cr.yp.to/20140323-ecdsa.html)
+ -- Since this key is only used to set up the block encryption,
+ -- its slow speed is ok.
+ keyparam = Param "ec:secp521r1"
+
+parseFingerprint :: String -> Maybe Fingerprint
+parseFingerprint = Fingerprint <$$> lastMaybe . filter isfingerprint . words
+ where
+ isfingerprint s =
+ let os = filter (all isHexDigit) (splitc ':' s)
+ in length os == 20
+
+send :: [CommandParam] -> [FilePath] -> CommandStart
+send ups fs = withTmpFile "send" $ \t h -> do
+ -- Need to be able to send files with the names of git-annex
+ -- keys, and uftp does not allow renaming the files that are sent.
+ -- In a direct mode repository, the annex objects do not have
+ -- the names of keys, and would have to be copied, which is too
+ -- expensive.
+ whenM isDirect $
+ giveup "Sorry, multicast send cannot be done from a direct mode repository."
+
+ showStart "generating file list" ""
+ fs' <- seekHelper LsFiles.inRepo fs
+ matcher <- Limit.getMatcher
+ let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $
+ liftIO $ hPutStrLn h o
+ forM_ fs' $ \f -> do
+ mk <- lookupFile f
+ case mk of
+ Nothing -> noop
+ Just k -> withObjectLoc k (addlist f) (const noop)
+ liftIO $ hClose h
+ showEndOk
+
+ showStart "sending files" ""
+ showOutput
+ serverkey <- uftpKey
+ u <- getUUID
+ withAuthList $ \authlist -> do
+ let ps =
+ -- Force client authentication.
+ [ Param "-c"
+ , Param "-Y", Param "aes256-cbc"
+ , Param "-h", Param "sha512"
+ -- Picked ecdh_ecdsa for perfect forward secrecy,
+ -- and because a EC key exchange algorithm is
+ -- needed since all keys are EC.
+ , Param "-e", Param "ecdh_ecdsa"
+ , Param "-k", uftpKeyParam serverkey
+ , Param "-U", Param (uftpUID u)
+ -- only allow clients on the authlist
+ , Param "-H", Param ("@"++authlist)
+ -- pass in list of files to send
+ , Param "-i", File t
+ ] ++ ups
+ liftIO (boolSystem "uftp" ps) >>= showEndResult
+ stop
+
+receive :: [CommandParam] -> CommandStart
+receive ups = do
+ showStart "receiving multicast files" ""
+ showNote "Will continue to run until stopped by ctrl-c"
+
+ showOutput
+ clientkey <- uftpKey
+ u <- getUUID
+ (callback, environ, statush) <- liftIO multicastCallbackEnv
+ tmpobjdir <- fromRepo gitAnnexTmpObjectDir
+ createAnnexDirectory tmpobjdir
+ withTmpDirIn tmpobjdir "multicast" $ \tmpdir -> withAuthList $ \authlist ->
+ abstmpdir <- liftIO $ absPath tmpdir
+ abscallback <- liftIO $ searchPath callback
+ let ps =
+ -- Avoid it running as a daemon.
+ [ Param "-d"
+ -- Require encryption.
+ , Param "-E"
+ , Param "-k", uftpKeyParam clientkey
+ , Param "-U", Param (uftpUID u)
+ -- Only allow servers on the authlist
+ , Param "-S", Param authlist
+ -- Receive files into tmpdir
+ -- (it needs an absolute path)
+ , Param "-D", File abstmpdir
+ -- Run callback after each file received
+ -- (it needs an absolute path)
+ , Param "-s", Param (fromMaybe callback abscallback)
+ ] ++ ups
+ runner <- liftIO $ async $
+ hClose statush
+ `after` boolSystemEnv "uftpd" ps (Just environ)
+ mapM_ storeReceived . lines =<< liftIO (hGetContents statush)
+ showEndResult =<< liftIO (wait runner)
+ stop
+
+storeReceived :: FilePath -> Annex ()
+storeReceived f = do
+ case file2key (takeFileName f) of
+ Nothing -> do
+ warning $ "Received a file " ++ f ++ " that is not a git-annex key. Deleting this file."
+ liftIO $ nukeFile f
+ Just k -> void $
+ getViaTmp' AlwaysVerify k $ \dest -> unVerified $
+ liftIO $ catchBoolIO $ do
+ rename f dest
+ return True
+
+-- Under Windows, uftp uses key containers, which are not files on the
+-- filesystem.
+data UftpKey = KeyFile FilePath | KeyContainer String
+
+uftpKeyParam :: UftpKey -> CommandParam
+uftpKeyParam (KeyFile f) = File f
+uftpKeyParam (KeyContainer s) = Param s
+
+uftpKey :: Annex UftpKey
+#ifdef mingw32_HOST_OS
+uftpKey = do
+ u <- getUUID
+ return $ KeyContainer $ "annex-" ++ fromUUID u
+#else
+uftpKey = KeyFile <$> cacheCredsFile "multicast"
+#endif
+
+-- uftp needs a unique UID for each client and server, which
+-- is a 8 digit hex number in the form "0xnnnnnnnn"
+-- Derive it from the UUID.
+uftpUID :: UUID -> String
+uftpUID u = "0x" ++ (take 8 $ show $ sha2_256 $ B8.fromString (fromUUID u))
+
+withAuthList :: (FilePath -> Annex a) -> Annex a
+withAuthList a = do
+ m <- knownFingerPrints
+ withTmpFile "authlist" $ \t h -> do
+ liftIO $ hPutStr h (genAuthList m)
+ liftIO $ hClose h
+ a t
+
+genAuthList :: M.Map UUID Fingerprint -> String
+genAuthList = unlines . map fmt . M.toList
+ where
+ fmt (u, Fingerprint f) = uftpUID u ++ "|" ++ f
diff --git a/Logs.hs b/Logs.hs
index 38bd1c0683..716520af44 100644
--- a/Logs.hs
+++ b/Logs.hs
@@ -41,6 +41,7 @@ topLevelUUIDBasedLogs =
, scheduleLog
, activityLog
, differenceLog
+ , multicastLog
]
{- All the ways to get a key from a presence log file -}
@@ -93,6 +94,8 @@ activityLog = "activity.log"
differenceLog :: FilePath
differenceLog = "difference.log"
+multicastLog :: FilePath
+multicastLog = "multicast.log"
{- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> String
diff --git a/Logs/Multicast.hs b/Logs/Multicast.hs
new file mode 100644
index 0000000000..386899fdf6
--- /dev/null
+++ b/Logs/Multicast.hs
@@ -0,0 +1,33 @@
+{- git-annex multicast fingerprint log
+ -
+ - Copyright 2017 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Logs.Multicast (
+ Fingerprint(..),
+ recordFingerprint,
+ knownFingerPrints,
+) where
+
+import Data.Time.Clock.POSIX
+
+import Annex.Common
+import qualified Annex.Branch
+import Logs
+import Logs.UUIDBased
+
+import qualified Data.Map as M
+
+newtype Fingerprint = Fingerprint String
+ deriving (Eq, Read, Show)
+
+recordFingerprint :: Fingerprint -> UUID -> Annex ()
+recordFingerprint fp uuid = do
+ ts <- liftIO getPOSIXTime
+ Annex.Branch.change multicastLog $
+ showLog show . changeLog ts uuid fp . parseLog readish
+
+knownFingerPrints :: Annex (M.Map UUID Fingerprint)
+knownFingerPrints = simpleMap . parseLog readish <$> Annex.Branch.get activityLog
diff --git a/Utility/FileMode.hs b/Utility/FileMode.hs
index fe9cbf56a3..d9a269448f 100644
--- a/Utility/FileMode.hs
+++ b/Utility/FileMode.hs
@@ -177,7 +177,10 @@ writeFileProtected file content = writeFileProtected' file
(\h -> hPutStr h content)
writeFileProtected' :: FilePath -> (Handle -> IO ()) -> IO ()
-writeFileProtected' file writer = withUmask 0o0077 $
+writeFileProtected' file writer = protectedOutput $
withFile file WriteMode $ \h -> do
void $ tryIO $ modifyFileMode file $ removeModes otherGroupModes
writer h
+
+protectedOutput :: IO a -> IO a
+protectedOutput = withUmask 0o0077
diff --git a/debian/control b/debian/control
index 7706d943f7..fa68c87fe7 100644
--- a/debian/control
+++ b/debian/control
@@ -116,6 +116,7 @@ Suggests:
magic-wormhole,
tahoe-lafs,
libnss-mdns,
+ uftp,
Description: manage files with git, without checking their contents into git
git-annex allows managing files with git, without checking the file
contents into git. While that may seem paradoxical, it is useful when
diff --git a/doc/git-annex-multicast.mdwn b/doc/git-annex-multicast.mdwn
new file mode 100644
index 0000000000..87b6310b74
--- /dev/null
+++ b/doc/git-annex-multicast.mdwn
@@ -0,0 +1,95 @@
+# NAME
+
+git-annex multicast - multicast file distribution
+
+# SYNOPSIS
+
+git annex multicast [options]
+
+# DESCRIPTION
+
+Multicast allows files to be broadcast to multiple receivers,
+typically on a single local network.
+
+The uftp program is used for multicast.
+<http://uftp-multicast.sourceforge.net/>
+
+# OPTIONS
+
+* `--gen-address`
+
+ Generates a multicast encryption key and stores a corresponding multicast
+ address to the git-annex branch.
+
+* `--send [file]`
+
+ Sends the specified files to any receivers whose multicast addresses
+ are stored in the git-annex branch.
+
+ When no files are specified, all annexed files in the current directory
+ and subdirectories are sent.
+
+ The [[git-annex-matching-options]] can be used to control which files to
+ send. For example:
+
+ git annex multicast send . --not --copies 2
+
+* `--receive`
+
+ Receives files from senders whose multicast addresses
+ are stored in the git-annex brach.
+
+ As each file is received, its filename is displayed. This is the filename
+ that the sender used; the local working tree may use a different name
+ for the file, or not contain a link to the file.
+
+ This command continues running, until it is interrupted by you pressing
+ ctrl-c.
+
+ Note that the configured annex.diskreserve is not honored by this
+ command, because `uftpd` receives the actual files, and can receive
+ any size file.
+
+* `--uftp-opt=option` `-Uoption`
+
+ Pass an option on to the uftp/uftpd command. May be specified multiple
+ times.
+
+ For example, to broadcast at 50 Mbps:
+
+ git annex multicast send -U-R -U50000
+
+# EXAMPLE
+
+Suppose a teacher wants to multicast files to students in a classroom.
+
+This assumes that the teacher and students have cloned a git-annex
+repository, and both can push changes to its git-annex branch,
+or otherwise push changes to each-other.
+
+First, the teacher runs `git annex multicast --gen-address; git annex sync`
+
+Next, students each run `git annex multicast --gen-address; git annex sync`
+
+Once all the students have generated addresses, the teacher runs
+`git annex sync` once more. (Now the students all have received the
+teacher's address, and the teacher has received all the student's addresses.)
+
+Next students each run `git annex multicast --receive`
+
+Finally, once the students are all listening (ahem), teacher runs
+`git annex multicast --send`
+
+# SEE ALSO
+
+[[git-annex]](1)
+
+uftp(1)
+
+uftpd(1)
+
+# AUTHOR
+
+Joey Hess <id@joeyh.name>
+
+Warning: Automatically converted into a man page by mdwn2man. Edit with care.
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 0add5a5371..07b8b19e19 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -164,6 +164,12 @@ subdirectories).
See [[git-annex-undo]](1) for details.
+* `multicast`
+
+ Multicast file distribution.
+
+ See [[git-annex-multicast]](1) for details.
+
* `watch`
Watch for changes and autocommit.
diff --git a/doc/internals.mdwn b/doc/internals.mdwn
index 78d0c8d472..00b65d2d13 100644
--- a/doc/internals.mdwn
+++ b/doc/internals.mdwn
@@ -288,3 +288,7 @@ that should prevent merging.
Example:
e605dca6-446a-11e0-8b2a-002170d25c55 [ObjectHashLower] timestamp=1422387398.30395s
+
+## `multicast.log`
+
+Records uftp public key fingerprints, for use by [[git-annex-multicast]].
diff --git a/doc/todo/multicast___34__broadcasting__34___of_content_on_local_net.mdwn b/doc/todo/multicast___34__broadcasting__34___of_content_on_local_net.mdwn
index fa240bdf5b..bd34770724 100644
--- a/doc/todo/multicast___34__broadcasting__34___of_content_on_local_net.mdwn
+++ b/doc/todo/multicast___34__broadcasting__34___of_content_on_local_net.mdwn
@@ -5,3 +5,6 @@ Although I haven't remembered that "hash thing" to perform the job, we looked ar
What do you think?
[[!meta name=yoh]]
+
+> [[done]]! I've only tested it with sender and receiver on the same
+> laptop, but it seems to work. --[[Joey]]
diff --git a/git-annex.cabal b/git-annex.cabal
index db868d1587..200ea30ae4 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -90,6 +90,7 @@ Extra-Source-Files:
doc/git-annex-migrate.mdwn
doc/git-annex-mirror.mdwn
doc/git-annex-move.mdwn
+ doc/git-annex-multicast.mdwn
doc/git-annex-numcopies.mdwn
doc/git-annex-p2p.mdwn
doc/git-annex-pre-commit.mdwn
@@ -318,7 +319,7 @@ Executable git-annex
stm (>= 2.3),
mtl (>= 2),
uuid (>= 1.2.6),
- process,
+ process (>= 1.4.2.0),
data-default,
case-insensitive,
random,
@@ -518,6 +519,7 @@ Executable git-annex
Annex.MakeRepo
Annex.MetaData
Annex.MetaData.StandardFields
+ Annex.Multicast
Annex.Notification
Annex.NumCopies
Annex.Path
@@ -732,6 +734,7 @@ Executable git-annex
Command.Migrate
Command.Mirror
Command.Move
+ Command.Multicast
Command.NotifyChanges
Command.NumCopies
Command.P2P
@@ -857,6 +860,7 @@ Executable git-annex
Logs.Location
Logs.MapLog
Logs.MetaData
+ Logs.Multicast
Logs.NumCopies
Logs.PreferredContent
Logs.PreferredContent.Raw