diff options
-rw-r--r-- | test/tools/test.html | 1 | ||||
-rw-r--r-- | test/tools/testHtmlExtractor.js | 4 | ||||
-rw-r--r-- | test/webhelper/testTranslate2.js | 5 | ||||
-rwxr-xr-x | tools/eos-html-extractor | 55 |
4 files changed, 50 insertions, 15 deletions
diff --git a/test/tools/test.html b/test/tools/test.html index 6687013..18c07e3 100644 --- a/test/tools/test.html +++ b/test/tools/test.html @@ -20,6 +20,7 @@ but that doesn't matter to HTML. </p> <span name="translatable">String with a "quote"</span> + <span name="translatable">String with<br>embedded <b>tags</b></span> </section> </body> </html> diff --git a/test/tools/testHtmlExtractor.js b/test/tools/testHtmlExtractor.js index 7d622e1..c1bde4e 100644 --- a/test/tools/testHtmlExtractor.js +++ b/test/tools/testHtmlExtractor.js @@ -11,7 +11,9 @@ _("Choose a template");\n\ #line 21 "test/tools/test.html"\n\ _("This is a string that is spread over multiple lines, but that doesn\'t matter to HTML.");\n\ #line 22 "test/tools/test.html"\n\ -_("String with a \\"quote\\"");\n'; +_("String with a \\"quote\\"");\n\ +#line 23 "test/tools/test.html"\n\ +_("String with<br>embedded <b>tags</b>");\n'; describe('eos-html-extractor', function () { it('works correctly at a minimum', function () { diff --git a/test/webhelper/testTranslate2.js b/test/webhelper/testTranslate2.js index 9cb7042..12a18f6 100644 --- a/test/webhelper/testTranslate2.js +++ b/test/webhelper/testTranslate2.js @@ -150,6 +150,11 @@ describe('WebHelper2 translator', function () { run_loop('<p name="translatable">String with "quotes"</p>'); expect(gettext_spy).toHaveBeenCalledWith('String with "quotes"'); }); + + it('handles embedded tags correctly', function () { + run_loop('<p name="translatable">Embedded<br><b>tags</b></p>'); + expect(gettext_spy).toHaveBeenCalledWith('Embedded<br><b>tags</b>'); + }); }); describe('used from client-side Javascript', function () { diff --git a/tools/eos-html-extractor b/tools/eos-html-extractor index c7f87cb..bf50e98 100755 --- a/tools/eos-html-extractor +++ b/tools/eos-html-extractor @@ -7,7 +7,6 @@ import io import os.path import re import sys -from bs4 import BeautifulSoup from html.parser import HTMLParser ESCAPES = str.maketrans({ @@ -20,14 +19,41 @@ def normalize_string(string): # Parser that adds line numbers to the HTML strings that need translating class TranslatableHTMLParser(HTMLParser): - def __init__(self, translatable_strings): + def __init__(self): super().__init__() self.all_translatable_data = [] self._comments_with_line_numbers = [] - self._translatable_strings = set(translatable_strings) + self._current_translatable_tag_level = None + self._opened_tags = [] + self._tag_level = 0 + self._text = '' - def handle_data(self, data): - if data not in self._translatable_strings: + def handle_starttag(self, tag, attrs): + self._tag_level += 1 + self._opened_tags.append(tag) + + if self._current_translatable_tag_level is not None: + self._text += self.get_starttag_text() + return + + if ('name', 'translatable') in attrs: + self._current_translatable_tag_level = self._tag_level + + def handle_endtag(self, tag): + self._tag_level -= 1 + # In non-X HTML, there can be tags that don't close, e.g. <meta>, <br> + while self._opened_tags.pop() != tag: + self._tag_level -= 1 + + if (self._current_translatable_tag_level is not None and + self._current_translatable_tag_level >= self._tag_level + 1): + self._current_translatable_tag_level = None + + if self._current_translatable_tag_level is not None: + self._text += '</' + tag + '>' + return + + if not self._text: return code_line = self.getpos()[0] @@ -45,7 +71,13 @@ class TranslatableHTMLParser(HTMLParser): if comment_line + comment_length == code_line: optional_comment = ' '.join(comment_string.split()) - self.all_translatable_data.append((normalize_string(data), code_line, optional_comment)) + self.all_translatable_data.append((normalize_string(self._text), code_line, optional_comment)) + self._text = '' + + def handle_data(self, data): + if self._current_translatable_tag_level is None: + return + self._text += data def handle_comment(self, comment): self._comments_with_line_numbers.append((comment, self.getpos()[0])) @@ -70,17 +102,12 @@ top_dir = args.top_srcdir final_path = os.path.relpath(html_file, top_dir) out_file = args.output -# Create the BeautifulSoup HTML-parsing object with open(html_file, encoding='utf-8') as f: page = f.read() -soup = BeautifulSoup(page) - -# Extract all translatable strings from that HTML -translatable_divs = soup.find_all(attrs={'name': 'translatable'}) -translatable_strings = map(lambda div: div.contents[0], translatable_divs) -# Find the line numbers for those strings -parser = TranslatableHTMLParser(translatable_strings) +# Extract all translatable strings from the HTML and find the line numbers for +# those strings +parser = TranslatableHTMLParser() parser.feed(page) # Write out all info about the translatable strings found in this file |