From 08bb5f5e26a9388b019e1ea4500c49e75b7180c3 Mon Sep 17 00:00:00 2001 From: Ximin Luo Date: Tue, 30 Sep 2014 02:04:36 +0100 Subject: amo-changelog: fetch all pages of Version History --- amo-changelog | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/amo-changelog b/amo-changelog index 273d6ad..f95359b 100755 --- a/amo-changelog +++ b/amo-changelog @@ -33,15 +33,21 @@ HTML_FOOT = "\n" def fix_outgoing_href(match): return 'href="%s"' % urllib2.unquote(match.group(1)) -def convert_rss_to_html(source, target): +def convert_rss_to_html(first, source, target): elements = etree.iterparse(source) - # title - element = next(elements)[1] - while element.tag != "title": + next_url = None + # header if first page + if first: element = next(elements)[1] - print(HTML_HEAD.format(title=element.text), file=target) - # items + while element.tag != "title": + element = next(elements)[1] + print(HTML_HEAD.format(title=element.text), file=target) + # items, rel for _, element in elements: + if element.tag == "{http://www.w3.org/2005/Atom}link": + if element.attrib["rel"] == "next": + next_url = element.attrib["href"] + continue if element.tag != "item": continue title = element.find("title").text.encode("utf-8") @@ -57,7 +63,10 @@ def convert_rss_to_html(source, target): else: print("[no description]", file=target) print("", file=target) - print(HTML_FOOT, file=target) + # footer if last page + if not next_url: + print(HTML_FOOT, file=target) + return next_url def which(cmd): path = os.environ.get("PATH", os.defpath).split(os.pathsep) @@ -110,16 +119,19 @@ def main(): try: with open(html_file, "w") as target: url = URL_TEMPLATE.format(ext=options.extension) - try: - source = urllib2.urlopen(url) - except urllib2.HTTPError as error: - print("%s: For extension '%s', error fetching '%s': %s" % - (progname, options.extension, url, error), file=sys.stderr) - raise - try: - convert_rss_to_html(source, target) - finally: - source.close() + first = True + while url: + try: + source = urllib2.urlopen(url) + except urllib2.HTTPError as error: + print("%s: For extension '%s', error fetching '%s': %s" % + (progname, options.extension, url, error), file=sys.stderr) + raise + try: + url = convert_rss_to_html(first, source, target) + first = False + finally: + source.close() print("wrote %s" % html_file, file=sys.stderr) except Exception as e: print("failed to write %s: %s" % (html_file, e), file=sys.stderr) -- cgit v1.2.3