summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/tools/test.html1
-rw-r--r--test/tools/testHtmlExtractor.js4
-rw-r--r--test/webhelper/testTranslate2.js5
-rwxr-xr-xtools/eos-html-extractor55
4 files changed, 50 insertions, 15 deletions
diff --git a/test/tools/test.html b/test/tools/test.html
index 6687013..18c07e3 100644
--- a/test/tools/test.html
+++ b/test/tools/test.html
@@ -20,6 +20,7 @@
but that doesn't matter to HTML.
</p>
<span name="translatable">String with a "quote"</span>
+ <span name="translatable">String with<br>embedded <b>tags</b></span>
</section>
</body>
</html>
diff --git a/test/tools/testHtmlExtractor.js b/test/tools/testHtmlExtractor.js
index 7d622e1..c1bde4e 100644
--- a/test/tools/testHtmlExtractor.js
+++ b/test/tools/testHtmlExtractor.js
@@ -11,7 +11,9 @@ _("Choose a template");\n\
#line 21 "test/tools/test.html"\n\
_("This is a string that is spread over multiple lines, but that doesn\'t matter to HTML.");\n\
#line 22 "test/tools/test.html"\n\
-_("String with a \\"quote\\"");\n';
+_("String with a \\"quote\\"");\n\
+#line 23 "test/tools/test.html"\n\
+_("String with<br>embedded <b>tags</b>");\n';
describe('eos-html-extractor', function () {
it('works correctly at a minimum', function () {
diff --git a/test/webhelper/testTranslate2.js b/test/webhelper/testTranslate2.js
index 9cb7042..12a18f6 100644
--- a/test/webhelper/testTranslate2.js
+++ b/test/webhelper/testTranslate2.js
@@ -150,6 +150,11 @@ describe('WebHelper2 translator', function () {
run_loop('<p name="translatable">String with "quotes"</p>');
expect(gettext_spy).toHaveBeenCalledWith('String with "quotes"');
});
+
+ it('handles embedded tags correctly', function () {
+ run_loop('<p name="translatable">Embedded<br><b>tags</b></p>');
+ expect(gettext_spy).toHaveBeenCalledWith('Embedded<br><b>tags</b>');
+ });
});
describe('used from client-side Javascript', function () {
diff --git a/tools/eos-html-extractor b/tools/eos-html-extractor
index c7f87cb..bf50e98 100755
--- a/tools/eos-html-extractor
+++ b/tools/eos-html-extractor
@@ -7,7 +7,6 @@ import io
import os.path
import re
import sys
-from bs4 import BeautifulSoup
from html.parser import HTMLParser
ESCAPES = str.maketrans({
@@ -20,14 +19,41 @@ def normalize_string(string):
# Parser that adds line numbers to the HTML strings that need translating
class TranslatableHTMLParser(HTMLParser):
- def __init__(self, translatable_strings):
+ def __init__(self):
super().__init__()
self.all_translatable_data = []
self._comments_with_line_numbers = []
- self._translatable_strings = set(translatable_strings)
+ self._current_translatable_tag_level = None
+ self._opened_tags = []
+ self._tag_level = 0
+ self._text = ''
- def handle_data(self, data):
- if data not in self._translatable_strings:
+ def handle_starttag(self, tag, attrs):
+ self._tag_level += 1
+ self._opened_tags.append(tag)
+
+ if self._current_translatable_tag_level is not None:
+ self._text += self.get_starttag_text()
+ return
+
+ if ('name', 'translatable') in attrs:
+ self._current_translatable_tag_level = self._tag_level
+
+ def handle_endtag(self, tag):
+ self._tag_level -= 1
+ # In non-X HTML, there can be tags that don't close, e.g. <meta>, <br>
+ while self._opened_tags.pop() != tag:
+ self._tag_level -= 1
+
+ if (self._current_translatable_tag_level is not None and
+ self._current_translatable_tag_level >= self._tag_level + 1):
+ self._current_translatable_tag_level = None
+
+ if self._current_translatable_tag_level is not None:
+ self._text += '</' + tag + '>'
+ return
+
+ if not self._text:
return
code_line = self.getpos()[0]
@@ -45,7 +71,13 @@ class TranslatableHTMLParser(HTMLParser):
if comment_line + comment_length == code_line:
optional_comment = ' '.join(comment_string.split())
- self.all_translatable_data.append((normalize_string(data), code_line, optional_comment))
+ self.all_translatable_data.append((normalize_string(self._text), code_line, optional_comment))
+ self._text = ''
+
+ def handle_data(self, data):
+ if self._current_translatable_tag_level is None:
+ return
+ self._text += data
def handle_comment(self, comment):
self._comments_with_line_numbers.append((comment, self.getpos()[0]))
@@ -70,17 +102,12 @@ top_dir = args.top_srcdir
final_path = os.path.relpath(html_file, top_dir)
out_file = args.output
-# Create the BeautifulSoup HTML-parsing object
with open(html_file, encoding='utf-8') as f:
page = f.read()
-soup = BeautifulSoup(page)
-
-# Extract all translatable strings from that HTML
-translatable_divs = soup.find_all(attrs={'name': 'translatable'})
-translatable_strings = map(lambda div: div.contents[0], translatable_divs)
-# Find the line numbers for those strings
-parser = TranslatableHTMLParser(translatable_strings)
+# Extract all translatable strings from the HTML and find the line numbers for
+# those strings
+parser = TranslatableHTMLParser()
parser.feed(page)
# Write out all info about the translatable strings found in this file