#!/usr/bin/python # Copyright (c) 2014, Jakub Wilk # Copyright (c) 2014, Ximin Luo # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. from __future__ import print_function import argparse import os import re import subprocess import sys import urllib2 import xml.etree.cElementTree as etree URL_TEMPLATE = "https://addons.mozilla.org/en-US/addon/{ext}/versions/format:rss" OUTGOING_HREF = re.compile(r'href="https?://outgoing\.mozilla\.org/v\d+/\w+/(.+?)"') HTML_HEAD = "\n{title}\n\n" HTML_FOOT = "\n" def fix_outgoing_href(match): return 'href="%s"' % urllib2.unquote(match.group(1)) def convert_rss_to_html(first, source, target): elements = etree.iterparse(source) next_url = None # header if first page if first: element = next(elements)[1] while element.tag != "title": element = next(elements)[1] print(HTML_HEAD.format(title=element.text), file=target) # items, rel for _, element in elements: if element.tag == "{http://www.w3.org/2005/Atom}link": if element.attrib["rel"] == "next": next_url = element.attrib["href"] continue if element.tag != "item": continue title = element.find("title").text.encode("utf-8") print("

%s

" % title, file=target) descel = element.find("description") if descel is not None and descel.text: desc = descel.text.rstrip("\n").encode("utf-8") # process manual line breaks, e.g. adblock-plus desc = desc.replace("\n", "\n
").replace("
\n", "
 \n") # strip outgoing redirect desc = OUTGOING_HREF.sub(fix_outgoing_href, desc) print(desc, file=target) else: print("[no description]", file=target) print("", file=target) # footer if last page if not next_url: print(HTML_FOOT, file=target) return next_url def which(cmd): path = os.environ.get("PATH", os.defpath).split(os.pathsep) for dir in path: name = os.path.join(dir, cmd) if (os.path.exists(name) and os.access(name, os.F_OK | os.X_OK) and not os.path.isdir(name)): return name return None def try_external_write(out, args, **kwargs): prog = args[0] if not which(prog): print("failed to write %s: program not found: %s" % (out, prog), file=sys.stderr) return False try: subprocess.check_call(args, **kwargs) print("wrote %s" % out, file=sys.stderr) return True except Exception as e: print("failed to write %s: %s" % (out, e), file=sys.stderr) return False def main(): parser = argparse.ArgumentParser( description="Fetch Version History of an addon from the Mozilla " "Extensions website and convert it into a human-readable format.") parser.add_argument("extension", help="Extension short-name, as used on addons.mozilla.org.") parser.add_argument("-f", "--html-file", metavar="FILE", default="debian/upstream/changelog.html", help="File to write to. Default: %(default)s.") parser.add_argument("-p", "--plain-format", metavar="FORMAT", choices=["text", "markdown", "rst"], default="none", help="Generate a human-readable form of the changelog in the file " "without the .html extension, using an external program. Possible " "options are text (uses lynx(1)), markdown (pandoc(1)), or rst " "(pandoc(1)). Default: %(default)s.") options = parser.parse_args() progname = os.path.basename(sys.argv[0]) html_file = options.html_file if not html_file.endswith(".html"): print("%s: Output filename must end with .html: %s" % (progname, html_file), file=sys.stderr) return 1 plain_file = html_file[:-5] try: with open(html_file, "w") as target: url = URL_TEMPLATE.format(ext=options.extension) first = True while url: try: source = urllib2.urlopen(url) except urllib2.HTTPError as error: print("%s: For extension '%s', error fetching '%s': %s" % (progname, options.extension, url, error), file=sys.stderr) raise try: url = convert_rss_to_html(first, source, target) first = False finally: source.close() print("wrote %s" % html_file, file=sys.stderr) except Exception as e: print("failed to write %s: %s" % (html_file, e), file=sys.stderr) #os.remove(html_file) return 1 if options.plain_format == "text": with open(plain_file, "w") as target: if not try_external_write(plain_file, ["lynx", "-dump", "-list_inline", "-width=84", html_file], stdout=target): #os.remove(plain_file) return 1 else: # 2 space indent is a bit more reasonable than lynx's 3 default # width=84 above (3*2-2) effectively cancels the right margin subprocess.call(["sed", "-i", "-e", "s/^ / /g", plain_file]) elif options.plain_format == "markdown": if not try_external_write(plain_file, ["pandoc", "-i", html_file, "--columns=79", "-wmarkdown", "-o", plain_file]): return 1 elif options.plain_format == "rst": if not try_external_write(plain_file, ["pandoc", "-i", html_file, "--columns=79", "-wrst", "-o", plain_file]): return 1 else: # work around https://github.com/jgm/pandoc/issues/1656 # by adding two spaces to all line-block continuation lines subprocess.call(["sed", "-i", "-r", "-e", r"/^\|/,/^ |^$/{s/^([^ |])/ \1/g}", plain_file]) return 0 if __name__ == "__main__": sys.exit(main())