From f3ae3d0da7199bb214ea84700c47370682c200b8 Mon Sep 17 00:00:00 2001 From: Fernando Farfan Date: Mon, 1 Jun 2015 15:30:35 -0600 Subject: Updated scripts to download CC license files Updated the scripts that we use to download HTML (and related) files for Creative Commons licenses. [endlessm/eos-sdk#3111] --- licenses/03-cleanup-cc-html-files.py | 53 ++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 21 deletions(-) (limited to 'licenses/03-cleanup-cc-html-files.py') diff --git a/licenses/03-cleanup-cc-html-files.py b/licenses/03-cleanup-cc-html-files.py index b67eeac..dabfb09 100755 --- a/licenses/03-cleanup-cc-html-files.py +++ b/licenses/03-cleanup-cc-html-files.py @@ -6,23 +6,34 @@ import re import sys def main(argv): - source_dir = 'creativecommons/' + langs = ['C', 'ar', 'es', 'fr', 'pt_BR'] + + # Clean up Public domain license files + cleanup_legalcode_file('publicdomain/', 'CC0-1.0') + for lang in langs: + cleanup_deed_file('publicdomain/', 'CC0-1.0', lang) + print '' - licenses = [ + # Clean up Creative Commons license files + cc_licenses = [ + 'CC-BY-2.0', 'CC-BY-3.0', 'CC-BY-4.0', - 'CC-BY-SA-3.0', - 'CC-BY-SA-4.0', + 'CC-BY-NC-2.0', + 'CC-BY-NC-3.0', + 'CC-BY-NC-SA-2.0', 'CC-BY-ND-2.0', 'CC-BY-ND-3.0', + 'CC-BY-SA-2.0', + 'CC-BY-SA-2.5', + 'CC-BY-SA-3.0', + 'CC-BY-SA-4.0', ] - langs = ['C', 'ar', 'es', 'fr', 'pt_BR'] - - for license in licenses: - cleanup_legalcode_file(source_dir, license) + for license in cc_licenses: + cleanup_legalcode_file('creativecommons/', license) for lang in langs: - cleanup_deed_file(source_dir, license, lang) + cleanup_deed_file('creativecommons/', license, lang) print '' def cleanup_legalcode_file(src_dir, license): @@ -42,11 +53,11 @@ def cleanup_legalcode_file(src_dir, license): script.extract() # Make attributes relative - rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../images/') - rewrite_attr(soup, 'img', 'src', '^/images/', '../images/') - rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../includes/') - rewrite_attr(soup, 'link', 'href', '^/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../includes/') + rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../../images/') + rewrite_attr(soup, 'img', 'src', '^/images/', '../../images/') + rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../../includes/') + rewrite_attr(soup, 'link', 'href', '^/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../../includes/') rewrite_attr(soup, 'a', 'href', '^creativecommons.org/', 'http://creativecommons.org/') rewrite_attr(soup, 'a', 'href', '^//creativecommons.org/', 'http://creativecommons.org/') @@ -83,13 +94,13 @@ def cleanup_deed_file(src_dir, license, lang): # Make attributes relative rewrite_attr(soup, 'a', 'href', '.*legalcode$', '../legalcode/' + license + '-legalcode.html') rewrite_attr(soup, 'a', 'href', '^/choose/', 'http://creativecommons.org/choose/') - rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../images/') - rewrite_attr(soup, 'img', 'src', '^/images/', '../images/') - rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../includes/') - rewrite_attr(soup, 'link', 'href', '^/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '^/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '^//scraper.creativecommons.org/js/deed.js', '../includes/deed.js') + rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../../images/') + rewrite_attr(soup, 'img', 'src', '^/images/', '../../images/') + rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../../includes/') + rewrite_attr(soup, 'link', 'href', '^/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '^/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '^//scraper.creativecommons.org/js/deed.js', '../../includes/deed.js') # Remove inline JS for script in soup.findAll('script'): -- cgit v1.2.3