diff options
author | Philip Chimento <philip@endlessm.com> | 2015-06-04 09:20:31 -0700 |
---|---|---|
committer | Philip Chimento <philip@endlessm.com> | 2015-06-04 14:27:31 -0700 |
commit | f17a6ff5c41215701b822ccc4d46e89832fbd033 (patch) | |
tree | 32e80569771061e8948903002c694dafc3514ea1 /tools/eos-html-extractor | |
parent | fbc49cb284067838416c6022d5c7dcb64899e030 (diff) |
Add eos-html-extractor and m4 file
This is taken almost directly from the existing version in eos-english.
Cleanups to follow in subsequent commits. Previously the m4 code was in
two separate macros, but since they were much the same, I combined them
into one macro.
This also adds a very minimal test for eos-html-extractor; basically as
a very quick regression test for the cleanups to follow.
[endlessm/eos-sdk#3245]
Diffstat (limited to 'tools/eos-html-extractor')
-rwxr-xr-x | tools/eos-html-extractor | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/tools/eos-html-extractor b/tools/eos-html-extractor new file mode 100755 index 0000000..c17d131 --- /dev/null +++ b/tools/eos-html-extractor @@ -0,0 +1,64 @@ +#!/usr/bin/env python +''' +Created on July 19, 2013 + +@author: Sebastian +''' +# This scraper depends on the BeautifulSoup4 module, make sure +# it's installed by running the following: +# apt-get install python-bs4 +import os, re, sys, urllib +from bs4 import BeautifulSoup +from HTMLParser import HTMLParser + +# Parser that adds line numbers to the HTML strings that need translating +class TranslatableHTMLParser(HTMLParser): + def handle_data(self, data): + if data in translatable_strings: + # Determine if comment should be included + most_recent_comment = comments_with_line_numbers[(len(comments_with_line_numbers))-1] + comment_string = most_recent_comment[0] + comment_line = most_recent_comment[1] + code_line = HTMLParser.getpos(self)[0] + # Comment takes up at least one line by default (hence the +1) + comment_length = len(re.findall(r"\n", comment_string)) + 1 + optional_comment = "" + # If the comment immediately preceded this string, include it + if comment_line + comment_length == code_line: + optional_comment = " ".join(comment_string.split()) + all_translatable_data.append((data.strip(), code_line, optional_comment)) + + def handle_comment(self, comment): + comments_with_line_numbers.append((comment, HTMLParser.getpos(self)[0])) + +# Ensure proper usage +if len(sys.argv) != 3: + print("Usage:") + print(" html_extractor.py <input-file> <top-srcdir>") + sys.exit(1) + +# Path from current directory to top-level app directory +html_file = sys.argv[1] +top_dir = sys.argv[2] +final_path = os.path.relpath(html_file, top_dir) + +# Create the BeautifulSoup HTML-parsing object +page = urllib.urlopen(urllib.pathname2url(html_file)).read() +soup = BeautifulSoup(page) + +# Extract all translatable strings from that HTML +translatable_divs = soup.find_all(attrs={"name" : "translatable"}) +translatable_strings = map(lambda div: div.contents[0].encode('utf-8'), translatable_divs) + +# Find the line numbers for those strings +all_translatable_data = [] +comments_with_line_numbers = [] +parser = TranslatableHTMLParser() +parser.feed(page) + +# Write out all info about the translatable strings found in this file +for string, line_num, optional_comment in all_translatable_data: + print ("#line " + str(line_num) + " \"" + final_path + "\"") + if optional_comment != "": + print ("// " + optional_comment) + print ("_(\"" + string + "\");") |