diff options
author | Fernando Farfan <ffarfan@gmail.com> | 2015-06-01 15:30:35 -0600 |
---|---|---|
committer | Fernando Farfan <ffarfan@gmail.com> | 2015-06-05 10:17:43 -0600 |
commit | f3ae3d0da7199bb214ea84700c47370682c200b8 (patch) | |
tree | 6937d7cfe3ca16c103f141de0987e1af50d5a31e | |
parent | 2bb72bd10b17d9eb0a001f0ad240dde0c2f26c6b (diff) |
Updated scripts to download CC license files
Updated the scripts that we use to download HTML (and related) files for
Creative Commons licenses.
[endlessm/eos-sdk#3111]
-rwxr-xr-x | licenses/01-download-cc-licenses.sh | 17 | ||||
-rwxr-xr-x | licenses/02-organize-cc-files.py | 93 | ||||
-rwxr-xr-x | licenses/03-cleanup-cc-html-files.py | 53 |
3 files changed, 116 insertions, 47 deletions
diff --git a/licenses/01-download-cc-licenses.sh b/licenses/01-download-cc-licenses.sh index 88e05e7..1cf0bb8 100755 --- a/licenses/01-download-cc-licenses.sh +++ b/licenses/01-download-cc-licenses.sh @@ -1,6 +1,19 @@ #!/bin/bash -licenses=("by/3.0" "by/4.0" "by-sa/3.0" "by-sa/4.0" "by-nd/2.0" "by-nd/3.0") +licenses=("publicdomain/zero/1.0/" \ + "licenses/by/2.0" \ + "licenses/by/3.0" \ + "licenses/by/4.0" \ + "licenses/by-nc/2.0" \ + "licenses/by-nc/3.0" \ + "licenses/by-nc-sa/2.0" \ + "licenses/by-nd/2.0" \ + "licenses/by-nd/3.0" \ + "licenses/by-sa/2.0" \ + "licenses/by-sa/2.5" \ + "licenses/by-sa/3.0" \ + "licenses/by-sa/4.0") + files=("legalcode" "deed.ar" "deed.en" "deed.es" "deed.fr" "deed.pt_BR") # Remove log file if exists @@ -13,6 +26,6 @@ do for file in "${files[@]}" do echo " " $file - wget --recursive --level=1 --adjust-extension --page-requisites --no-clobber --no-verbose https://creativecommons.org/licenses/$license/$file --append-output=wget-cc.log + wget --recursive --level=1 --adjust-extension --page-requisites --no-clobber --no-verbose https://creativecommons.org/$license/$file --append-output=wget-cc.log done done diff --git a/licenses/02-organize-cc-files.py b/licenses/02-organize-cc-files.py index 457b9a6..54a90a0 100755 --- a/licenses/02-organize-cc-files.py +++ b/licenses/02-organize-cc-files.py @@ -6,34 +6,86 @@ import sys def main(argv): source_dir = 'creativecommons.org/' - target_dir = 'creativecommons/' - copy_license_files(source_dir + 'licenses/', target_dir) - copy_requisite_files(source_dir, target_dir) + # Organize Public domain license files + publicdomain_licenses = [ + { + 'name': 'zero/1.0', + 'code': 'CC0-1.0' + } + ] + copy_license_files(source_dir + 'publicdomain/', 'publicdomain/', publicdomain_licenses) + + # Organize Creative Commons license files + cc_licenses = [ + { + 'name': 'by/2.0', + 'code': 'CC-BY-2.0' + }, + { + 'name': 'by/3.0', + 'code': 'CC-BY-3.0' + }, + { + 'name': 'by/4.0', + 'code': 'CC-BY-4.0' + }, + { + 'name': 'by-nc/2.0', + 'code': 'CC-BY-NC-2.0' + }, + { + 'name': 'by-nc/3.0', + 'code': 'CC-BY-NC-3.0' + }, + { + 'name': 'by-nc-sa/2.0', + 'code': 'CC-BY-NC-SA-2.0' + }, + { + 'name': 'by-nd/2.0', + 'code': 'CC-BY-ND-2.0' + }, + { + 'name': 'by-nd/3.0', + 'code': 'CC-BY-ND-3.0' + }, + { + 'name': 'by-sa/2.0', + 'code': 'CC-BY-SA-2.0' + }, + { + 'name': 'by-sa/2.5', + 'code': 'CC-BY-SA-2.5' + }, + { + 'name': 'by-sa/3.0', + 'code': 'CC-BY-SA-3.0' + }, + { + 'name': 'by-sa/4.0', + 'code': 'CC-BY-SA-4.0' + }, + ] + copy_license_files(source_dir + 'licenses/', 'creativecommons/', cc_licenses) -def copy_license_files(source_dir, target_dir): + copy_requisite_files(source_dir, './') - licenses = [ - 'by/3.0', - 'by/4.0', - 'by-sa/3.0', - 'by-sa/4.0', - 'by-nd/2.0', - 'by-nd/3.0', - ] +def copy_license_files(source_dir, target_dir, licenses): langs = ['ar', 'en', 'es', 'fr', 'pt_BR'] ensure_target_dirs_exist(target_dir, langs) for license in licenses: - license_code = get_code_for_license(license) + license_name = license['name'] + license_code = license['code'] - source_license_dir = source_dir + license + '/' + source_license_dir = source_dir + license_name + '/' target_legalcode_dir = target_dir + 'legalcode/' - # Move legalcode file for license + # Move legalcode file for license_name source_legalcode_path = source_license_dir + 'legalcode.html' target_legalcode_path = target_legalcode_dir + license_code + '-legalcode.html' print 'cp ' + source_legalcode_path + ' ' + target_legalcode_path @@ -45,7 +97,7 @@ def copy_license_files(source_dir, target_dir): source_deed_path = source_license_dir + 'deed.' + lang + '.html' target_deed_path = target_lang_dir + license_code + '.html' - # Move deed file for license/language + # Move deed file for license_name/language print 'cp ' + source_deed_path + ' ' + target_deed_path shutil.copy(source_deed_path, target_deed_path) @@ -57,11 +109,7 @@ def copy_license_files(source_dir, target_dir): def copy_requisite_files(source_dir, target_dir): for req in ['images/', 'includes/']: print 'cp ' + source_dir + req + ' ' + target_dir + req - try: - shutil.rmtree(source_dir + req, ignore_errors=True) - except IOError: - pass - shutil.copytree(target_dir + req, source_dir + req) + shutil.copytree(source_dir + req, target_dir + req) def ensure_target_dirs_exist(target_dir, langs): # Ensure target directory exists @@ -85,8 +133,5 @@ def ensure_target_dirs_exist(target_dir, langs): except OSError: pass -def get_code_for_license(license): - return 'CC-' + license.upper().replace('/', '-') - if __name__ == '__main__': main(sys.argv[1:]) diff --git a/licenses/03-cleanup-cc-html-files.py b/licenses/03-cleanup-cc-html-files.py index b67eeac..dabfb09 100755 --- a/licenses/03-cleanup-cc-html-files.py +++ b/licenses/03-cleanup-cc-html-files.py @@ -6,23 +6,34 @@ import re import sys def main(argv): - source_dir = 'creativecommons/' + langs = ['C', 'ar', 'es', 'fr', 'pt_BR'] + + # Clean up Public domain license files + cleanup_legalcode_file('publicdomain/', 'CC0-1.0') + for lang in langs: + cleanup_deed_file('publicdomain/', 'CC0-1.0', lang) + print '' - licenses = [ + # Clean up Creative Commons license files + cc_licenses = [ + 'CC-BY-2.0', 'CC-BY-3.0', 'CC-BY-4.0', - 'CC-BY-SA-3.0', - 'CC-BY-SA-4.0', + 'CC-BY-NC-2.0', + 'CC-BY-NC-3.0', + 'CC-BY-NC-SA-2.0', 'CC-BY-ND-2.0', 'CC-BY-ND-3.0', + 'CC-BY-SA-2.0', + 'CC-BY-SA-2.5', + 'CC-BY-SA-3.0', + 'CC-BY-SA-4.0', ] - langs = ['C', 'ar', 'es', 'fr', 'pt_BR'] - - for license in licenses: - cleanup_legalcode_file(source_dir, license) + for license in cc_licenses: + cleanup_legalcode_file('creativecommons/', license) for lang in langs: - cleanup_deed_file(source_dir, license, lang) + cleanup_deed_file('creativecommons/', license, lang) print '' def cleanup_legalcode_file(src_dir, license): @@ -42,11 +53,11 @@ def cleanup_legalcode_file(src_dir, license): script.extract() # Make attributes relative - rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../images/') - rewrite_attr(soup, 'img', 'src', '^/images/', '../images/') - rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../includes/') - rewrite_attr(soup, 'link', 'href', '^/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../includes/') + rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../../images/') + rewrite_attr(soup, 'img', 'src', '^/images/', '../../images/') + rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../../includes/') + rewrite_attr(soup, 'link', 'href', '^/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../../includes/') rewrite_attr(soup, 'a', 'href', '^creativecommons.org/', 'http://creativecommons.org/') rewrite_attr(soup, 'a', 'href', '^//creativecommons.org/', 'http://creativecommons.org/') @@ -83,13 +94,13 @@ def cleanup_deed_file(src_dir, license, lang): # Make attributes relative rewrite_attr(soup, 'a', 'href', '.*legalcode$', '../legalcode/' + license + '-legalcode.html') rewrite_attr(soup, 'a', 'href', '^/choose/', 'http://creativecommons.org/choose/') - rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../images/') - rewrite_attr(soup, 'img', 'src', '^/images/', '../images/') - rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../includes/') - rewrite_attr(soup, 'link', 'href', '^/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '^/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '^//scraper.creativecommons.org/js/deed.js', '../includes/deed.js') + rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../../images/') + rewrite_attr(soup, 'img', 'src', '^/images/', '../../images/') + rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../../includes/') + rewrite_attr(soup, 'link', 'href', '^/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '^/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '^//scraper.creativecommons.org/js/deed.js', '../../includes/deed.js') # Remove inline JS for script in soup.findAll('script'): |