summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Chimento <philip@endlessm.com>2015-06-18 13:53:01 -0700
committerPhilip Chimento <philip@endlessm.com>2015-06-18 13:53:01 -0700
commit0e2e2b25766b33e98860b59d1363be528477519d (patch)
treefa2e6fea86af4db60c885433fd10cfb1d8466cdb
parentb290a1214d0e78fb0a613b232b15d3e423292e1e (diff)
Handle excess whitespace in strings
Whitespace between words and tags doesn't matter to HTML. Indeed, the text in a translatable element may be formatted any way over any number of lines, so we normalize all consecutive whitespace to be just one space character and strip whitespace from the beginning and end of the strings. This is so that translators are not confronted with strange newlines and whitespace on Transifex. [endlessm/eos-sdk#3291]
-rw-r--r--test/tools/test.html4
-rw-r--r--test/tools/testHtmlExtractor.js4
-rw-r--r--test/webhelper/testTranslate2.js19
-rwxr-xr-xtools/eos-html-extractor5
-rw-r--r--webhelper/webextensions/wh2extension.c41
5 files changed, 63 insertions, 10 deletions
diff --git a/test/tools/test.html b/test/tools/test.html
index 4dfda70..9f7e341 100644
--- a/test/tools/test.html
+++ b/test/tools/test.html
@@ -15,6 +15,10 @@
<nav id="finance-nav">
</nav>
</div>
+ <p name="translatable">
+ This is a string that is spread over multiple lines,
+ but that doesn't matter to HTML.
+ </p>
</section>
</body>
</html>
diff --git a/test/tools/testHtmlExtractor.js b/test/tools/testHtmlExtractor.js
index 4f46706..88215ef 100644
--- a/test/tools/testHtmlExtractor.js
+++ b/test/tools/testHtmlExtractor.js
@@ -7,7 +7,9 @@ _("Finance Builder");\n\
// TRANSLATORS: This is a test of UTF-8 encoded characters\n\
_("My Bü∂get");\n\
#line 13 "test/tools/test.html"\n\
-_("Choose a template");\n';
+_("Choose a template");\n\
+#line 21 "test/tools/test.html"\n\
+_("This is a string that is spread over multiple lines, but that doesn\'t matter to HTML.");\n'
describe('eos-html-extractor', function () {
it('works correctly at a minimum', function () {
diff --git a/test/webhelper/testTranslate2.js b/test/webhelper/testTranslate2.js
index b34c4cf..207f3b5 100644
--- a/test/webhelper/testTranslate2.js
+++ b/test/webhelper/testTranslate2.js
@@ -87,9 +87,10 @@ describe('WebHelper2 translator', function () {
});
describe('translating a page', function () {
- let webview;
+ let webview, gettext_spy;
+ const MINIMAL_HTML = '<p name="translatable">Translate Me</p>';
- function run_loop() {
+ function run_loop(html=MINIMAL_HTML) {
webview.connect('load-changed', (webview, event) => {
if (event === WebKit2.LoadEvent.FINISHED) {
webhelper.translate_html(webview, null, (obj, res) => {
@@ -98,18 +99,17 @@ describe('WebHelper2 translator', function () {
});
}
});
- webview.load_html('<html><body><p name="translatable">Translate Me</p></body></html>',
- null);
+ webview.load_html('<html><body>' + html + '</body></html>', null);
Mainloop.run('webhelper2');
}
beforeEach(function () {
webview = new WebKit2.WebView();
+ gettext_spy = jasmine.createSpy('gettext_spy').and.callFake((s) => s);
+ webhelper.set_gettext(gettext_spy);
});
it('translates a string', function () {
- let gettext_spy = jasmine.createSpy('gettext_spy').and.callFake((s) => s);
- webhelper.set_gettext(gettext_spy);
run_loop();
expect(gettext_spy).toHaveBeenCalledWith('Translate Me');
});
@@ -138,6 +138,13 @@ describe('WebHelper2 translator', function () {
});
webview.load_html('<html><body></body></html>', null);
});
+
+ it('normalizes a string before translating it', function () {
+ run_loop('<p name="translatable">\n\
+ Translate Me\n\
+ </p>');
+ expect(gettext_spy).toHaveBeenCalledWith('Translate Me');
+ });
});
describe('used from client-side Javascript', function () {
diff --git a/tools/eos-html-extractor b/tools/eos-html-extractor
index f82afa1..ef8fc54 100755
--- a/tools/eos-html-extractor
+++ b/tools/eos-html-extractor
@@ -10,6 +10,9 @@ import sys
from bs4 import BeautifulSoup
from html.parser import HTMLParser
+def normalize_string(string):
+ return re.sub(r'\s+', ' ', string.strip())
+
# Parser that adds line numbers to the HTML strings that need translating
class TranslatableHTMLParser(HTMLParser):
@@ -38,7 +41,7 @@ class TranslatableHTMLParser(HTMLParser):
if comment_line + comment_length == code_line:
optional_comment = ' '.join(comment_string.split())
- self.all_translatable_data.append((data.strip(), code_line, optional_comment))
+ self.all_translatable_data.append((normalize_string(data), code_line, optional_comment))
def handle_comment(self, comment):
self._comments_with_line_numbers.append((comment, self.getpos()[0]))
diff --git a/webhelper/webextensions/wh2extension.c b/webhelper/webextensions/wh2extension.c
index 654a304..8848f2e 100644
--- a/webhelper/webextensions/wh2extension.c
+++ b/webhelper/webextensions/wh2extension.c
@@ -232,6 +232,39 @@ ngettext_shim (JSContextRef js,
return retval;
}
+static gchar *
+normalize_string (const gchar *string)
+{
+ static GRegex *whitespace = NULL;
+
+ if (g_once_init_enter (&whitespace))
+ {
+ GError *regex_error = NULL;
+ GRegex *new_regex = g_regex_new ("\\s+", G_REGEX_OPTIMIZE, 0,
+ &regex_error);
+ // Don't free; will persist until exit
+ if (new_regex == NULL)
+ {
+ g_critical ("Trouble creating regex: %s\n", regex_error->message);
+ g_clear_error (&regex_error);
+ }
+
+ g_once_init_leave (&whitespace, new_regex);
+ }
+
+ GError *error = NULL;
+ gchar *copy = g_strstrip (g_strdup (string));
+ gchar *retval = g_regex_replace_literal (whitespace, copy, -1, 0, " ", 0, &error);
+ if (retval == NULL)
+ {
+ g_critical ("Trouble normalizing string: %s\n", error->message);
+ g_clear_error (&error);
+ return copy;
+ }
+ g_free (copy);
+ return retval;
+}
+
static void
translate_html (WebKitDOMDocument *dom,
Context *ctxt)
@@ -251,7 +284,8 @@ translate_html (WebKitDOMDocument *dom,
{
WebKitDOMHTMLElement *el_html = WEBKIT_DOM_HTML_ELEMENT (element);
gchar *inner_html = webkit_dom_html_element_get_inner_html (el_html);
- gchar *translated_html = translation_function (inner_html, ctxt);
+ gchar *normalized = normalize_string (inner_html);
+ gchar *translated_html = translation_function (normalized, ctxt);
webkit_dom_html_element_set_inner_html (el_html, translated_html,
&error);
if (error != NULL)
@@ -263,11 +297,13 @@ translate_html (WebKitDOMDocument *dom,
g_free (translated_html);
g_free (inner_html);
+ g_free (normalized);
}
else
{
gchar *text = webkit_dom_node_get_text_content (element);
- gchar *translated_text = translation_function (text, ctxt);
+ gchar *normalized = normalize_string (text);
+ gchar *translated_text = translation_function (normalized, ctxt);
webkit_dom_node_set_text_content (element, translated_text, &error);
if (error != NULL)
{
@@ -278,6 +314,7 @@ translate_html (WebKitDOMDocument *dom,
g_free (translated_text);
g_free (text);
+ g_free (normalized);
}
}