diff options
author | Fernando Farfan <ffarfan@gmail.com> | 2015-06-01 15:30:35 -0600 |
---|---|---|
committer | Fernando Farfan <ffarfan@gmail.com> | 2015-06-05 10:17:43 -0600 |
commit | f3ae3d0da7199bb214ea84700c47370682c200b8 (patch) | |
tree | 6937d7cfe3ca16c103f141de0987e1af50d5a31e /licenses/03-cleanup-cc-html-files.py | |
parent | 2bb72bd10b17d9eb0a001f0ad240dde0c2f26c6b (diff) |
Updated scripts to download CC license files
Updated the scripts that we use to download HTML (and related) files for
Creative Commons licenses.
[endlessm/eos-sdk#3111]
Diffstat (limited to 'licenses/03-cleanup-cc-html-files.py')
-rwxr-xr-x | licenses/03-cleanup-cc-html-files.py | 53 |
1 files changed, 32 insertions, 21 deletions
diff --git a/licenses/03-cleanup-cc-html-files.py b/licenses/03-cleanup-cc-html-files.py index b67eeac..dabfb09 100755 --- a/licenses/03-cleanup-cc-html-files.py +++ b/licenses/03-cleanup-cc-html-files.py @@ -6,23 +6,34 @@ import re import sys def main(argv): - source_dir = 'creativecommons/' + langs = ['C', 'ar', 'es', 'fr', 'pt_BR'] + + # Clean up Public domain license files + cleanup_legalcode_file('publicdomain/', 'CC0-1.0') + for lang in langs: + cleanup_deed_file('publicdomain/', 'CC0-1.0', lang) + print '' - licenses = [ + # Clean up Creative Commons license files + cc_licenses = [ + 'CC-BY-2.0', 'CC-BY-3.0', 'CC-BY-4.0', - 'CC-BY-SA-3.0', - 'CC-BY-SA-4.0', + 'CC-BY-NC-2.0', + 'CC-BY-NC-3.0', + 'CC-BY-NC-SA-2.0', 'CC-BY-ND-2.0', 'CC-BY-ND-3.0', + 'CC-BY-SA-2.0', + 'CC-BY-SA-2.5', + 'CC-BY-SA-3.0', + 'CC-BY-SA-4.0', ] - langs = ['C', 'ar', 'es', 'fr', 'pt_BR'] - - for license in licenses: - cleanup_legalcode_file(source_dir, license) + for license in cc_licenses: + cleanup_legalcode_file('creativecommons/', license) for lang in langs: - cleanup_deed_file(source_dir, license, lang) + cleanup_deed_file('creativecommons/', license, lang) print '' def cleanup_legalcode_file(src_dir, license): @@ -42,11 +53,11 @@ def cleanup_legalcode_file(src_dir, license): script.extract() # Make attributes relative - rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../images/') - rewrite_attr(soup, 'img', 'src', '^/images/', '../images/') - rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../includes/') - rewrite_attr(soup, 'link', 'href', '^/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../includes/') + rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../../images/') + rewrite_attr(soup, 'img', 'src', '^/images/', '../../images/') + rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../../includes/') + rewrite_attr(soup, 'link', 'href', '^/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../../includes/') rewrite_attr(soup, 'a', 'href', '^creativecommons.org/', 'http://creativecommons.org/') rewrite_attr(soup, 'a', 'href', '^//creativecommons.org/', 'http://creativecommons.org/') @@ -83,13 +94,13 @@ def cleanup_deed_file(src_dir, license, lang): # Make attributes relative rewrite_attr(soup, 'a', 'href', '.*legalcode$', '../legalcode/' + license + '-legalcode.html') rewrite_attr(soup, 'a', 'href', '^/choose/', 'http://creativecommons.org/choose/') - rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../images/') - rewrite_attr(soup, 'img', 'src', '^/images/', '../images/') - rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../includes/') - rewrite_attr(soup, 'link', 'href', '^/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '^/includes/', '../includes/') - rewrite_attr(soup, 'script', 'src', '^//scraper.creativecommons.org/js/deed.js', '../includes/deed.js') + rewrite_attr(soup, 'img', 'src', '.*creativecommons.org/images/', '../../images/') + rewrite_attr(soup, 'img', 'src', '^/images/', '../../images/') + rewrite_attr(soup, 'link', 'href', '.*creativecommons.org/includes/', '../../includes/') + rewrite_attr(soup, 'link', 'href', '^/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '.*creativecommons.org/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '^/includes/', '../../includes/') + rewrite_attr(soup, 'script', 'src', '^//scraper.creativecommons.org/js/deed.js', '../../includes/deed.js') # Remove inline JS for script in soup.findAll('script'): |