diff options
Diffstat (limited to 'tools/eos-html-extractor')
-rwxr-xr-x | tools/eos-html-extractor | 58 |
1 files changed, 30 insertions, 28 deletions
diff --git a/tools/eos-html-extractor b/tools/eos-html-extractor index 84a0062..64800c9 100755 --- a/tools/eos-html-extractor +++ b/tools/eos-html-extractor @@ -1,36 +1,37 @@ #!/usr/bin/env python -''' -Created on July 19, 2013 -@author: Sebastian -''' -# This scraper depends on the BeautifulSoup4 module, make sure -# it's installed by running the following: -# apt-get install python-bs4 +# Copyright 2013-2015 Endless Mobile, Inc. + import argparse -import os, re, urllib +import os +import re +import urllib from bs4 import BeautifulSoup from HTMLParser import HTMLParser + # Parser that adds line numbers to the HTML strings that need translating class TranslatableHTMLParser(HTMLParser): def handle_data(self, data): - if data in translatable_strings: - # Determine if comment should be included - most_recent_comment = comments_with_line_numbers[(len(comments_with_line_numbers))-1] - comment_string = most_recent_comment[0] - comment_line = most_recent_comment[1] - code_line = HTMLParser.getpos(self)[0] - # Comment takes up at least one line by default (hence the +1) - comment_length = len(re.findall(r"\n", comment_string)) + 1 - optional_comment = "" - # If the comment immediately preceded this string, include it - if comment_line + comment_length == code_line: - optional_comment = " ".join(comment_string.split()) - all_translatable_data.append((data.strip(), code_line, optional_comment)) + if data not in translatable_strings: + return + + # Determine if comment should be included + most_recent_comment = comments_with_line_numbers[-1] + comment_string, comment_line = most_recent_comment + code_line = self.getpos()[0] + + # Comment takes up at least one line by default (hence the +1) + comment_length = len(re.findall(r'\n', comment_string)) + 1 + optional_comment = '' + + # If the comment immediately preceded this string, include it + if comment_line + comment_length == code_line: + optional_comment = ' '.join(comment_string.split()) + all_translatable_data.append((data.strip(), code_line, optional_comment)) def handle_comment(self, comment): - comments_with_line_numbers.append((comment, HTMLParser.getpos(self)[0])) + comments_with_line_numbers.append((comment, self.getpos()[0])) parser = argparse.ArgumentParser(description='Extract translatable strings ' + 'from HTML files. This is xgettext for HTML.') @@ -50,8 +51,9 @@ page = urllib.urlopen(urllib.pathname2url(html_file)).read() soup = BeautifulSoup(page) # Extract all translatable strings from that HTML -translatable_divs = soup.find_all(attrs={"name" : "translatable"}) -translatable_strings = map(lambda div: div.contents[0].encode('utf-8'), translatable_divs) +translatable_divs = soup.find_all(attrs={'name': 'translatable'}) +translatable_strings = map(lambda div: div.contents[0].encode('utf-8'), + translatable_divs) # Find the line numbers for those strings all_translatable_data = [] @@ -61,7 +63,7 @@ parser.feed(page) # Write out all info about the translatable strings found in this file for string, line_num, optional_comment in all_translatable_data: - print ("#line " + str(line_num) + " \"" + final_path + "\"") - if optional_comment != "": - print ("// " + optional_comment) - print ("_(\"" + string + "\");") + print('#line {line} "{path}"'.format(line=line_num, path=final_path)) + if optional_comment != '': + print('// ' + optional_comment) + print('_("{string}");'.format(string=string)) |