summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVasudev Kamath <kamathvasudev@gmail.com>2015-10-24 18:06:08 +0530
committerVasudev Kamath <kamathvasudev@gmail.com>2015-10-24 18:06:08 +0530
commite367e141ed8f00c307fa55c748e588021f8e59d6 (patch)
tree11791afa6b55637dea148214a77597fadecba2b6
parent8b1619d1a1cdb9a2da65a3f2bbf823cb96bca08b (diff)
Add rules file and needed script.
-rwxr-xr-xdebian/license-miner242
-rwxr-xr-xdebian/rules48
2 files changed, 290 insertions, 0 deletions
diff --git a/debian/license-miner b/debian/license-miner
new file mode 100755
index 0000000..596281a
--- /dev/null
+++ b/debian/license-miner
@@ -0,0 +1,242 @@
+#! /usr/bin/perl
+
+use autodie;
+use strict;
+use utf8;
+use warnings qw(all);
+use feature 'say';
+
+use Getopt::Long;
+use Pod::Usage;
+use FileHandle;
+use Regexp::Assemble;
+use Image::ExifTool;
+use Font::TTF::Font;
+use Font::TTF::Ttc;
+
+=head1 NAME
+
+license-miner - extract copyright/licensing info from complex files
+
+=head1 SYNOPSIS
+
+license-miner [B<options>] [F<path>|inspector:F<path>...]
+
+=head1 OPTIONS
+
+=over 12
+
+=item B<--help>
+
+Print a brief help message and exits.
+
+=item B<--man>
+
+Prints the manual page and exits.
+
+=item B<--verbose>
+
+Prints names of paths and the inspector used.
+
+=item B<--debug>
+
+Prints extracted info.
+
+=back
+
+=head1 DESCRIPTION
+
+B<This program> will inspect files,
+extract their copyright and licensing info,
+and save the result next to the files
+(adding suffix "F<.metadata_dump>").
+
+File paths are provided either as arguments
+or (if no arguments provided) from STDIN.
+
+Each path may optionally be prefixed with an inspector to use.
+Default is to pick inspector based on file suffix.
+
+=head1 INSPECTORS
+
+Available inspectors are B<ttf> and B<exif>.
+
+=over 12
+
+=item B<ttf>
+
+TrueType fonts (including Truetype-flavored OpenType and WOFF).
+
+Used by default for extensions F<.ttf>, F<.otf>, F<woff>.
+
+Beware that some OpenType fonts are not TrueType but Type1,
+which may fail to parse correctly based on suffix detection.
+If that happens, try force using the exif inspector
+by prefixing the path with "exif:".
+
+=item B<ttc>
+
+TrueType collections (including Truetype-flavored OpenType).
+
+Used by default for extension F<.ttc>.
+
+If parsing fails, try force using the exif inspector
+by prefixing the path with "exif:".
+
+=item B<exif>
+
+misc. images and fonts.
+
+Used by default for extensions F<.pdf>, F<.png>, F<.jpg>, F<jpeg>, F<gif>, F<icc>.
+
+Beware that some OpenType fonts are not TrueType but Type1,
+which may fail to parse correctly based on suffix detection.
+If that happens, try force using the exif inspector
+by prefixing the path with "exif:".
+
+=back
+
+=cut
+
+# avoid custom configuration of ExifTool
+BEGIN { $Image::ExifTool::configFile = '' }
+
+GetOptions( help => \my $help,
+ man => \my $man,
+ verbose => \my $verbose,
+ debug => \my $debug,
+) or pod2usage(2);
+pod2usage( -verbose => 1 ) if $help;
+pod2usage( -verbose => 2, -exitstatus => 0 ) if $man;
+
+# Fail if no paths provided as arguments and STDIN is interactive
+pod2usage("$0: No paths provided.") if ((@ARGV == 0) && (-t STDIN));
+
+my $dispatch = {
+ # TrueType (including Truetype-flavored OpenType and WOFF) fonts
+ '((?<=\Attf:).*|\A.*\.(?:ttf|otf|woff))$' => sub {
+ my $file = check_infile(shift);
+ say "ttf: $file" if ($verbose);
+ my $handle = ($debug)
+ ? *STDOUT{IO}
+ : FileHandle->new( check_outfile($file), 'w' );
+ # source: http://scripts.sil.org/IWS-Chapter08#3054f18b
+ my %table = (
+ Copyright => 0,
+ Trademark => 7,
+ License => 13,
+ 'License URL' => 14,
+ );
+ my $font = Font::TTF::Font->open($file) or do {
+ say STDERR "ERROR: Failed to parse file as TrueType font: $_";
+ exit 1;
+ };
+ my $fn = $font->{'name'}->read;
+ foreach (sort keys %table) {
+ my $value = $fn->find_name($table{$_});
+ print $handle $_ . ": " . $value . "\n"
+ if ($value);
+ }
+ },
+ # TrueType (including Truetype-flavored OpenType) collections
+ '((?<=\Attc:).*|\A.*\.(?:ttc))$' => sub {
+ my $file = check_infile(shift);
+ say "ttf: $file" if ($verbose);
+ my $handle = ($debug)
+ ? *STDOUT{IO}
+ : FileHandle->new( check_outfile($file), 'w' );
+ # source: http://scripts.sil.org/IWS-Chapter08#3054f18b
+ my %table = (
+ Copyright => 0,
+ Trademark => 7,
+ License => 13,
+ 'License URL' => 14,
+ );
+ my $collection = Font::TTF::Ttc->open($file) or do {
+ say STDERR "ERROR: Failed to parse file as TrueType collection: $_";
+ exit 1;
+ };
+ foreach ( @{$collection->{'directs'}} ) {
+ my $fn = $_->{'name'}->read;
+ foreach (sort keys %table) {
+ my $value = $fn->find_name($table{$_});
+ print $handle $_ . ": " . $value . "\n"
+ if ($value);
+ }
+ }
+ },
+ # exif: misc. images and fonts
+ '((?<=\Aexif:).*|\A.*\.(?:pdf|png|jpg|jpeg|gif|icc))$' => sub {
+ my $file = check_infile(shift);
+ say "exif: $file" if ($verbose);
+ my $exifTool = new Image::ExifTool;
+ my $handle = ($debug)
+ ? *STDOUT{IO}
+ : FileHandle->new( check_outfile($file), 'w' );
+ my $info = $exifTool->ImageInfo($file,
+ # tags to lookup (like `exiftool $file` in shell)
+ '*Copyright*', '*Licens*', '*Trademark*');
+ my $seen;
+ foreach (sort keys %$info) {
+ my $tagdesc = $exifTool->GetDescription($_);
+ print $handle "$tagdesc: $$info{$_}\n";
+ }
+ }
+};
+
+my $re = Regexp::Assemble->new( track => 1 )->add( keys %$dispatch );
+
+while( <> ) {
+ chomp;
+ if( $re->match($_) ) {
+ $dispatch->{ $re->matched }( $re->mvar(1) );
+ }
+ else {
+ say STDERR "ERROR: Unsupported or unparseable string: $_";
+ say STDERR " maybe you need a prefix (e.g. \"exif:fonts/SomeType1Font\"";
+ exit 1;
+ }
+}
+
+sub check_infile {
+ my $infile = shift;
+ unless ( -e $infile ) {
+ say STDERR "ERROR: file does not exist: $infile";
+ exit 1;
+ }
+ return $infile;
+}
+
+sub check_outfile {
+ my $infile = shift;
+ my $outfile = $infile . ".metadata_dump";
+ if ( -e $outfile ) {
+ say STDERR "ERROR: dumpfile exist: $outfile";
+ say STDERR " remove or put aside and try again";
+ exit 1;
+ }
+ return $outfile;
+}
+
+=head1 AUTHOR
+
+Jonas Smedegaard, C<< <dr@jones.dk> >>
+
+=head1 LICENSE AND COPYRIGHT
+
+Copyright 2014-2015 Jonas Smedegaard
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program. If not, see <http://www.gnu.org/licenses/>.
+
+=cut
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..cd9f745
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,48 @@
+#!/usr/bin/make -f
+
+# -*- mode: makefile; coding: utf-8 -*-
+# Copyright © 2013, 2015 Jonas Smedegaard <dr@jones.dk>
+# Copyright © 2013-2015 Vasudev Kamath <kamathvasudev@gmail.com>
+# Description: Main Debian packaging script for pugixml
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# This needs to declared/run before including CDBS snippets
+
+include /usr/share/cdbs/1/rules/upstream-tarball.mk
+include /usr/share/cdbs/1/rules/utils.mk
+include /usr/share/cdbs/1/rules/debhelper.mk
+
+# Uncomment below lines once upstream tags its releases.
+# DEB_UPSTREAM_URL = https://github.com/zeux/pugixml/archive
+# DEB_UPSTREAM_TARBALL_BASENAME = v$(DEB_UPSTREAM_TARBALL_VERSION)
+
+# extract metadata from images before copyright check
+CDBS_BUILD_DEPENDS +=, libregexp-assemble-perl, libimage-exiftool-perl
+CDBS_BUILD_DEPENDS +=, libfont-ttf-perl
+local_inspection_regex = ttf|ttc
+DEB_COPYRIGHT_CHECK_IGNORE_REGEX = ^((.*/)?[^/]+\.($(local_inspection_regex))|debian/(changelog|copyright(|_hints|_newhints)))$$
+
+debian/stamp-copyright-check: debian/stamp-extract-copyright
+debian/stamp-extract-copyright:
+ find * -type f -regextype posix-extended \
+ -regex '.*\.($(local_inspection_regex))' \
+ -print0 | perl -0 debian/license-miner
+ touch $@
+pre-build:: clean-extracted-copyright-during-build
+clean-extracted-copyright-during-build: debian/stamp-copyright-check
+ find -type f -name '*.metadata_dump' -delete
+clean::
+ find -type f -name '*.metadata_dump' -delete
+ rm -f debian/stamp-extract-copyright