summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXimin Luo <infinity0@pwned.gg>2014-09-30 01:59:25 +0100
committerXimin Luo <infinity0@pwned.gg>2014-12-17 10:29:11 +0100
commit003eca75448abf77413b93b3c8c3cf56f48cee34 (patch)
tree88cbbb18617fefeac7c4b77c1eb1817f28e820c6
parentbc9f279909ac9c7e47f2b4599630565fe03230ec (diff)
amo-changelog: parse into human-readable form and save to debian/upstream/changelog{,.html}
-rwxr-xr-xamo-changelog142
-rw-r--r--man/amo-changelog.116
2 files changed, 132 insertions, 26 deletions
diff --git a/amo-changelog b/amo-changelog
index 4ce9e73..273d6ad 100755
--- a/amo-changelog
+++ b/amo-changelog
@@ -19,43 +19,141 @@ from __future__ import print_function
import argparse
import os
+import re
+import subprocess
import sys
import urllib2
import xml.etree.cElementTree as etree
URL_TEMPLATE = "https://addons.mozilla.org/en-US/addon/{ext}/versions/format:rss"
+OUTGOING_HREF = re.compile(r'href="https?://outgoing\.mozilla\.org/v\d+/\w+/(.+?)"')
+HTML_HEAD = "<html>\n<head><title>{title}</title></head>\n<body>\n"
+HTML_FOOT = "</body>\n</html>"
+def fix_outgoing_href(match):
+ return 'href="%s"' % urllib2.unquote(match.group(1))
+
+def convert_rss_to_html(source, target):
+ elements = etree.iterparse(source)
+ # title
+ element = next(elements)[1]
+ while element.tag != "title":
+ element = next(elements)[1]
+ print(HTML_HEAD.format(title=element.text), file=target)
+ # items
+ for _, element in elements:
+ if element.tag != "item":
+ continue
+ title = element.find("title").text.encode("utf-8")
+ print("<h2>%s</h2>" % title, file=target)
+ descel = element.find("description")
+ if descel is not None and descel.text:
+ desc = descel.text.rstrip("\n").encode("utf-8")
+ # process manual line breaks, e.g. adblock-plus
+ desc = desc.replace("\n", "\n<br/>").replace("<br/>\n", "<br/>&nbsp;\n")
+ # strip outgoing redirect
+ desc = OUTGOING_HREF.sub(fix_outgoing_href, desc)
+ print(desc, file=target)
+ else:
+ print("[no description]", file=target)
+ print("", file=target)
+ print(HTML_FOOT, file=target)
+
+def which(cmd):
+ path = os.environ.get("PATH", os.defpath).split(os.pathsep)
+ for dir in path:
+ name = os.path.join(dir, cmd)
+ if (os.path.exists(name) and os.access(name, os.F_OK | os.X_OK)
+ and not os.path.isdir(name)):
+ return name
+ return None
+
+def try_external_write(out, args, **kwargs):
+ prog = args[0]
+ if not which(prog):
+ print("failed to write %s: program not found: %s" % (out, prog), file=sys.stderr)
+ return False
+ try:
+ subprocess.check_call(args, **kwargs)
+ print("wrote %s" % out, file=sys.stderr)
+ return True
+ except Exception as e:
+ print("failed to write %s: %s" % (out, e), file=sys.stderr)
+ return False
def main():
parser = argparse.ArgumentParser(
- description="fetch Version History of an addon from the Mozilla Extensions website.")
+ description="Fetch Version History of an addon from the Mozilla "
+ "Extensions website and convert it into a human-readable format.")
parser.add_argument("extension",
- help="Extension short-name, as used on addons.mozilla.org.")
+ help="Extension short-name, as used on addons.mozilla.org.")
+ parser.add_argument("-f", "--html-file",
+ metavar="FILE", default="debian/upstream/changelog.html",
+ help="File to write to. Default: %(default)s.")
+ parser.add_argument("-p", "--plain-format", metavar="FORMAT",
+ choices=["text", "markdown", "rst"], default="none",
+ help="Generate a human-readable form of the changelog in the file "
+ "without the .html extension, using an external program. Possible "
+ "options are text (uses lynx(1)), markdown (pandoc(1)), or rst "
+ "(pandoc(1)). Default: %(default)s.")
options = parser.parse_args()
- url = URL_TEMPLATE.format(ext=options.extension)
- try:
- fp = urllib2.urlopen(url)
- except urllib2.HTTPError as error:
- print("%s: For extension '%s', error fetching '%s': %s" %
- (os.path.basename(sys.argv[0]), options.extension, url, error),
- file=sys.stderr)
+ progname = os.path.basename(sys.argv[0])
+
+ html_file = options.html_file
+ if not html_file.endswith(".html"):
+ print("%s: Output filename must end with .html: %s" %
+ (progname, html_file), file=sys.stderr)
return 1
+ plain_file = html_file[:-5]
+
try:
- for _, element in etree.iterparse(fp):
- if element.tag != "item":
- continue
- title = element.find("title").text.encode("utf-8")
- print(title)
- print("=" * len(title))
- descel = element.find("description")
- if descel is not None and descel.text:
- print(descel.text.rstrip("\n").encode("utf-8"))
+ with open(html_file, "w") as target:
+ url = URL_TEMPLATE.format(ext=options.extension)
+ try:
+ source = urllib2.urlopen(url)
+ except urllib2.HTTPError as error:
+ print("%s: For extension '%s', error fetching '%s': %s" %
+ (progname, options.extension, url, error), file=sys.stderr)
+ raise
+ try:
+ convert_rss_to_html(source, target)
+ finally:
+ source.close()
+ print("wrote %s" % html_file, file=sys.stderr)
+ except Exception as e:
+ print("failed to write %s: %s" % (html_file, e), file=sys.stderr)
+ #os.remove(html_file)
+ return 1
+
+ if options.plain_format == "text":
+ with open(plain_file, "w") as target:
+ if not try_external_write(plain_file,
+ ["lynx", "-dump", "-list_inline", "-width=84", html_file], stdout=target):
+ #os.remove(plain_file)
+ return 1
else:
- print("[no description]")
- print("")
- finally:
- fp.close()
+ # 2 space indent is a bit more reasonable than lynx's 3 default
+ # width=84 above (3*2-2) effectively cancels the right margin
+ subprocess.call(["sed", "-i", "-e", "s/^ / /g", plain_file])
+
+ elif options.plain_format == "markdown":
+ if not try_external_write(plain_file,
+ ["pandoc", "-i", html_file, "--columns=79", "-wmarkdown", "-o", plain_file]):
+ return 1
+
+ elif options.plain_format == "rst":
+ if not try_external_write(plain_file,
+ ["pandoc", "-i", html_file, "--columns=79", "-wrst", "-o", plain_file]):
+ return 1
+ else:
+ # work around https://github.com/jgm/pandoc/issues/1656
+ # by adding two spaces to all line-block continuation lines
+ subprocess.call(["sed", "-i", "-r",
+ "-e", r"/^\|/,/^ |^$/{s/^([^ |])/ \1/g}", plain_file])
+
+ return 0
+
if __name__ == "__main__":
sys.exit(main())
diff --git a/man/amo-changelog.1 b/man/amo-changelog.1
index 197ff17..09ab68e 100644
--- a/man/amo-changelog.1
+++ b/man/amo-changelog.1
@@ -12,7 +12,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.TH XPI-REPACK "1" "April 2014" "amo-changelog" "mozilla-devscripts suite"
+.TH AMO-CHANGELOG "1" "December 2014" "amo-changelog" "mozilla-devscripts suite"
.SH NAME
amo-changelog \- fetch Version History of an addon
.SH SYNOPSIS
@@ -37,20 +37,28 @@ Here is an example for debian/rules:
.br
override_dh_installchangelogs:
.br
- dh_installchangelogs debian/changelog.upstream
+ dh_installchangelogs debian/upstream/changelog.html debian/upstream/changelog
\[char46]PHONY: get-orig-changelog
.br
get-orig-changelog:
.br
- amo-changelog adblock-plus > debian/changelog.upstream
+ amo-changelog -p rst adblock-plus
-Using this approach, one would save the output file (debian/changelog.upstream)
+Using this approach, one would save the output files debian/upstream/{changelog.html,changelog}
as part of the Debian packaging. When updating the package with a new upstream
release, one would run `debian/rules get-orig-changelog`
.SH OPTIONS
.TP
\fB\-h\fR, \fB\-\-help\fR
Display a brief help message.
+.TP
+\fB\-f\fR, \fB\-\-html\-file\fR
+File to write to. Default: debian/upstream/changelog.html
+.TP
+\fB\-p\fR, \fB\-\-plain\-format\fR
+Generate a human-readable form of the changelog in the file without the .html
+extension, using an external program. Possible options are text (uses lynx(1)),
+markdown (pandoc(1)), or rst (pandoc(1)). Default: none.
.SH AUTHOR
Jakub Wilk <jwilk@debian.org> and Ximin Luo <infinity0@pwned.gg>