diff options
Diffstat (limited to 'tools/eos-html-extractor')
-rwxr-xr-x | tools/eos-html-extractor | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/tools/eos-html-extractor b/tools/eos-html-extractor new file mode 100755 index 0000000..72a3acd --- /dev/null +++ b/tools/eos-html-extractor @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 + +# Copyright 2013-2015 Endless Mobile, Inc. + +import argparse +import os.path +import re +from bs4 import BeautifulSoup +from html.parser import HTMLParser + + +# Parser that adds line numbers to the HTML strings that need translating +class TranslatableHTMLParser(HTMLParser): + def __init__(self, translatable_strings): + super().__init__() + self.all_translatable_data = [] + self._comments_with_line_numbers = [] + self._translatable_strings = set(translatable_strings) + + def handle_data(self, data): + if data not in self._translatable_strings: + return + + code_line = self.getpos()[0] + optional_comment = None + + if self._comments_with_line_numbers: + # Determine if comment should be included + most_recent_comment = self._comments_with_line_numbers[-1] + comment_string, comment_line = most_recent_comment + + # Comment takes up at least one line by default (hence the +1) + comment_length = len(re.findall(r'\n', comment_string)) + 1 + + # If the comment immediately preceded this string, include it + if comment_line + comment_length == code_line: + optional_comment = ' '.join(comment_string.split()) + + self.all_translatable_data.append((data.strip(), code_line, optional_comment)) + + def handle_comment(self, comment): + self._comments_with_line_numbers.append((comment, self.getpos()[0])) + +parser = argparse.ArgumentParser(description='Extract translatable strings ' + + 'from HTML files. This is xgettext for HTML.') +parser.add_argument('input_file', type=str, + help='Input file to scan') +parser.add_argument('top_srcdir', type=str, nargs='?', default='.', + help='Top-level source directory (for printing correct #line directives)') +args = parser.parse_args() + +# Path from current directory to top-level app directory +html_file = args.input_file +top_dir = args.top_srcdir +final_path = os.path.relpath(html_file, top_dir) + +# Create the BeautifulSoup HTML-parsing object +with open(html_file) as f: + page = f.read() +soup = BeautifulSoup(page) + +# Extract all translatable strings from that HTML +translatable_divs = soup.find_all(attrs={'name': 'translatable'}) +translatable_strings = map(lambda div: div.contents[0], translatable_divs) + +# Find the line numbers for those strings +parser = TranslatableHTMLParser(translatable_strings) +parser.feed(page) + +# Write out all info about the translatable strings found in this file +for string, line_num, optional_comment in parser.all_translatable_data: + print('#line {line} "{path}"'.format(line=line_num, path=final_path)) + if optional_comment: + print('// ' + optional_comment) + print('_("{string}");'.format(string=string)) |