summaryrefslogtreecommitdiff
path: root/tools/eos-html-extractor
diff options
context:
space:
mode:
Diffstat (limited to 'tools/eos-html-extractor')
-rwxr-xr-xtools/eos-html-extractor75
1 files changed, 75 insertions, 0 deletions
diff --git a/tools/eos-html-extractor b/tools/eos-html-extractor
new file mode 100755
index 0000000..72a3acd
--- /dev/null
+++ b/tools/eos-html-extractor
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+# Copyright 2013-2015 Endless Mobile, Inc.
+
+import argparse
+import os.path
+import re
+from bs4 import BeautifulSoup
+from html.parser import HTMLParser
+
+
+# Parser that adds line numbers to the HTML strings that need translating
+class TranslatableHTMLParser(HTMLParser):
+ def __init__(self, translatable_strings):
+ super().__init__()
+ self.all_translatable_data = []
+ self._comments_with_line_numbers = []
+ self._translatable_strings = set(translatable_strings)
+
+ def handle_data(self, data):
+ if data not in self._translatable_strings:
+ return
+
+ code_line = self.getpos()[0]
+ optional_comment = None
+
+ if self._comments_with_line_numbers:
+ # Determine if comment should be included
+ most_recent_comment = self._comments_with_line_numbers[-1]
+ comment_string, comment_line = most_recent_comment
+
+ # Comment takes up at least one line by default (hence the +1)
+ comment_length = len(re.findall(r'\n', comment_string)) + 1
+
+ # If the comment immediately preceded this string, include it
+ if comment_line + comment_length == code_line:
+ optional_comment = ' '.join(comment_string.split())
+
+ self.all_translatable_data.append((data.strip(), code_line, optional_comment))
+
+ def handle_comment(self, comment):
+ self._comments_with_line_numbers.append((comment, self.getpos()[0]))
+
+parser = argparse.ArgumentParser(description='Extract translatable strings ' +
+ 'from HTML files. This is xgettext for HTML.')
+parser.add_argument('input_file', type=str,
+ help='Input file to scan')
+parser.add_argument('top_srcdir', type=str, nargs='?', default='.',
+ help='Top-level source directory (for printing correct #line directives)')
+args = parser.parse_args()
+
+# Path from current directory to top-level app directory
+html_file = args.input_file
+top_dir = args.top_srcdir
+final_path = os.path.relpath(html_file, top_dir)
+
+# Create the BeautifulSoup HTML-parsing object
+with open(html_file) as f:
+ page = f.read()
+soup = BeautifulSoup(page)
+
+# Extract all translatable strings from that HTML
+translatable_divs = soup.find_all(attrs={'name': 'translatable'})
+translatable_strings = map(lambda div: div.contents[0], translatable_divs)
+
+# Find the line numbers for those strings
+parser = TranslatableHTMLParser(translatable_strings)
+parser.feed(page)
+
+# Write out all info about the translatable strings found in this file
+for string, line_num, optional_comment in parser.all_translatable_data:
+ print('#line {line} "{path}"'.format(line=line_num, path=final_path))
+ if optional_comment:
+ print('// ' + optional_comment)
+ print('_("{string}");'.format(string=string))