summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFernando Farfan <ffarfan@gmail.com>2015-06-01 15:30:35 -0600
committerFernando Farfan <ffarfan@gmail.com>2015-06-05 10:17:43 -0600
commitf3ae3d0da7199bb214ea84700c47370682c200b8 (patch)
tree6937d7cfe3ca16c103f141de0987e1af50d5a31e
parent2bb72bd10b17d9eb0a001f0ad240dde0c2f26c6b (diff)
Updated scripts to download CC license files
Updated the scripts that we use to download HTML (and related) files for Creative Commons licenses. [endlessm/eos-sdk#3111]
-rwxr-xr-xlicenses/01-download-cc-licenses.sh17
-rwxr-xr-xlicenses/02-organize-cc-files.py93
-rwxr-xr-xlicenses/03-cleanup-cc-html-files.py53
3 files changed, 116 insertions, 47 deletions
diff --git a/licenses/01-download-cc-licenses.sh b/licenses/01-download-cc-licenses.sh
index 88e05e7..1cf0bb8 100755
--- a/licenses/01-download-cc-licenses.sh
+++ b/licenses/01-download-cc-licenses.sh
@@ -1,6 +1,19 @@
#!/bin/bash
-licenses=("by/3.0" "by/4.0" "by-sa/3.0" "by-sa/4.0" "by-nd/2.0" "by-nd/3.0")
+licenses=("publicdomain/zero/1.0/" \
+ "licenses/by/2.0" \
+ "licenses/by/3.0" \
+ "licenses/by/4.0" \
+ "licenses/by-nc/2.0" \
+ "licenses/by-nc/3.0" \
+ "licenses/by-nc-sa/2.0" \
+ "licenses/by-nd/2.0" \
+ "licenses/by-nd/3.0" \
+ "licenses/by-sa/2.0" \
+ "licenses/by-sa/2.5" \
+ "licenses/by-sa/3.0" \
+ "licenses/by-sa/4.0")
+
files=("legalcode" "deed.ar" "deed.en" "deed.es" "deed.fr" "deed.pt_BR")
# Remove log file if exists
@@ -13,6 +26,6 @@ do
for file in "${files[@]}"
do
echo " " $file
- wget --recursive --level=1 --adjust-extension --page-requisites --no-clobber --no-verbose https://creativecommons.org/licenses/$license/$file --append-output=wget-cc.log
+ wget --recursive --level=1 --adjust-extension --page-requisites --no-clobber --no-verbose https://creativecommons.org/$license/$file --append-output=wget-cc.log
done
done
diff --git a/licenses/02-organize-cc-files.py b/licenses/02-organize-cc-files.py
index 457b9a6..54a90a0 100755
--- a/licenses/02-organize-cc-files.py
+++ b/licenses/02-organize-cc-files.py
@@ -6,34 +6,86 @@ import sys
def main(argv):
source_dir = 'creativecommons.org/'
- target_dir = 'creativecommons/'
- copy_license_files(source_dir + 'licenses/', target_dir)
- copy_requisite_files(source_dir, target_dir)
+ # Organize Public domain license files
+ publicdomain_licenses = [
+ {
+ 'name': 'zero/1.0',
+ 'code': 'CC0-1.0'
+ }
+ ]
+ copy_license_files(source_dir + 'publicdomain/', 'publicdomain/', publicdomain_licenses)
+
+ # Organize Creative Commons license files
+ cc_licenses = [
+ {
+ 'name': 'by/2.0',
+ 'code': 'CC-BY-2.0'
+ },
+ {
+ 'name': 'by/3.0',
+ 'code': 'CC-BY-3.0'
+ },
+ {
+ 'name': 'by/4.0',
+ 'code': 'CC-BY-4.0'
+ },
+ {
+ 'name': 'by-nc/2.0',
+ 'code': 'CC-BY-NC-2.0'
+ },
+ {
+ 'name': 'by-nc/3.0',
+ 'code': 'CC-BY-NC-3.0'
+ },
+ {
+ 'name': 'by-nc-sa/2.0',
+ 'code': 'CC-BY-NC-SA-2.0'
+ },
+ {
+ 'name': 'by-nd/2.0',
+ 'code': 'CC-BY-ND-2.0'
+ },
+ {
+ 'name': 'by-nd/3.0',
+ 'code': 'CC-BY-ND-3.0'
+ },
+ {
+ 'name': 'by-sa/2.0',
+ 'code': 'CC-BY-SA-2.0'
+ },
+ {
+ 'name': 'by-sa/2.5',
+ 'code': 'CC-BY-SA-2.5'
+ },
+ {
+ 'name': 'by-sa/3.0',
+ 'code': 'CC-BY-SA-3.0'
+ },
+ {
+ 'name': 'by-sa/4.0',
+ 'code': 'CC-BY-SA-4.0'
+ },
+ ]
+ copy_license_files(source_dir + 'licenses/', 'creativecommons/', cc_licenses)
-def copy_license_files(source_dir, target_dir):
+ copy_requisite_files(source_dir, './')
- licenses = [
- 'by/3.0',
- 'by/4.0',
- 'by-sa/3.0',
- 'by-sa/4.0',
- 'by-nd/2.0',
- 'by-nd/3.0',
- ]
+def copy_license_files(source_dir, target_dir, licenses):
langs = ['ar', 'en', 'es', 'fr', 'pt_BR']
ensure_target_dirs_exist(target_dir, langs)
for license in licenses:
- license_code = get_code_for_license(license)
+ license_name = license['name']
+ license_code = license['code']
- source_license_dir = source_dir + license + '/'
+ source_license_dir = source_dir + license_name + '/'
target_legalcode_dir = target_dir + 'legalcode/'
- # Move legalcode file for license
+ # Move legalcode file for license_name
source_legalcode_path = source_license_dir + 'legalcode.html'
target_legalcode_path = target_legalcode_dir + license_code + '-legalcode.html'
print 'cp ' + source_legalcode_path + ' ' + target_legalcode_path
@@ -45,7 +97,7 @@ def copy_license_files(source_dir, target_dir):
source_deed_path = source_license_dir + 'deed.' + lang + '.html'
target_deed_path = target_lang_dir + license_code + '.html'
- # Move deed file for license/language
+ # Move deed file for license_name/language
print 'cp ' + source_deed_path + ' ' + target_deed_path
shutil.copy(source_deed_path, target_deed_path)
@@ -57,11 +109,7 @@ def copy_license_files(source_dir, target_dir):
def copy_requisite_files(source_dir, target_dir):
for req in ['images/', 'includes/']:
print 'cp ' + source_dir + req + ' ' + target_dir + req
- try:
- shutil.rmtree(source_dir + req, ignore_errors=True)
- except IOError:
- pass
- shutil.copytree(target_dir + req, source_dir + req)
+ shutil.copytree(source_dir + req, target_dir + req)
def ensure_target_dirs_exist(target_dir, langs):
# Ensure target directory exists
@@ -85,8 +133,5 @@ def ensure_target_dirs_exist(target_dir, langs):
except OSError:
pass
-def get_code_for_license(license):
- return 'CC-' + license.upper().replace('/', '-')
-
if __name__ == '__main__':
main(sys.argv[1:])
diff --git a/licenses/03-cleanup-cc-html-files.py b/licenses/03-cleanup-cc-html-files.py
index b67eeac..dabfb09 100755
--- a/licenses/03-cleanup-cc-html-files.py
+++ b/licenses/03-cleanup-cc-html-files.py
@@ -6,23 +6,34 @@ import re
import sys
def main(argv):
- source_dir = 'creativecommons/'
+ langs = ['C', 'ar', 'es', 'fr', 'pt_BR']
+
+ # Clean up Public domain license files
+ cleanup_legalcode_file('publicdomain/', 'CC0-1.0')
+ for lang in langs:
+ cleanup_deed_file('publicdomain/', 'CC0-1.0', lang)
+ print ''
- licenses = [
+ # Clean up Creative Commons license files
+ cc_licenses = [
+ 'CC-BY-2.0',
'CC-BY-3.0',
'CC-BY-4.0',
- 'CC-BY-SA-3.0',
- 'CC-BY-SA-4.0',
+ 'CC-BY-NC-2.0',
+ 'CC-BY-NC-3.0',
+ 'CC-BY-NC-SA-2.0',
'CC-BY-ND-2.0',
'CC-BY-ND-3.0',
+ 'CC-BY-SA-2.0',
+ 'CC-BY-SA-2.5',
+ 'CC-BY-SA-3.0',
+ 'CC-BY-SA-4.0',
]
- langs = ['C', 'ar', 'es', 'fr', 'pt_BR']
-
- for license in licenses:
- cleanup_legalcode_file(source_dir, license)
+ for license in cc_licenses:
+ cleanup_legalcode_file('creativecommons/', license)
for lang in langs:
- cleanup_deed_file(source_dir, license, lang)
+ cleanup_deed_file('creativecommons/', license, lang)
print ''
def cleanup_legalcode_file(src_dir, license):
@@ -42,11 +53,11 @@ def cleanup_legalcode_file(src_dir, license):
script.extract()
# Make attributes relative
- rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../images/')
- rewrite_attr(soup, 'img', 'src', '^/images/', '../images/')
- rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../includes/')
- rewrite_attr(soup, 'link', 'href', '^/includes/', '../includes/')
- rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../includes/')
+ rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../../images/')
+ rewrite_attr(soup, 'img', 'src', '^/images/', '../../images/')
+ rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../../includes/')
+ rewrite_attr(soup, 'link', 'href', '^/includes/', '../../includes/')
+ rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../../includes/')
rewrite_attr(soup, 'a', 'href', '^creativecommons.org/', 'http://creativecommons.org/')
rewrite_attr(soup, 'a', 'href', '^//creativecommons.org/', 'http://creativecommons.org/')
@@ -83,13 +94,13 @@ def cleanup_deed_file(src_dir, license, lang):
# Make attributes relative
rewrite_attr(soup, 'a', 'href', '.*legalcode$', '../legalcode/' + license + '-legalcode.html')
rewrite_attr(soup, 'a', 'href', '^/choose/', 'http://creativecommons.org/choose/')
- rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../images/')
- rewrite_attr(soup, 'img', 'src', '^/images/', '../images/')
- rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../includes/')
- rewrite_attr(soup, 'link', 'href', '^/includes/', '../includes/')
- rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../includes/')
- rewrite_attr(soup, 'script', 'src', '^/includes/', '../includes/')
- rewrite_attr(soup, 'script', 'src', '^//scraper.creativecommons.org/js/deed.js', '../includes/deed.js')
+ rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../../images/')
+ rewrite_attr(soup, 'img', 'src', '^/images/', '../../images/')
+ rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../../includes/')
+ rewrite_attr(soup, 'link', 'href', '^/includes/', '../../includes/')
+ rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../../includes/')
+ rewrite_attr(soup, 'script', 'src', '^/includes/', '../../includes/')
+ rewrite_attr(soup, 'script', 'src', '^//scraper.creativecommons.org/js/deed.js', '../../includes/deed.js')
# Remove inline JS
for script in soup.findAll('script'):