summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorPhilip Chimento <philip@endlessm.com>2015-06-18 13:53:01 -0700
committerPhilip Chimento <philip@endlessm.com>2015-06-18 13:53:01 -0700
commit0e2e2b25766b33e98860b59d1363be528477519d (patch)
treefa2e6fea86af4db60c885433fd10cfb1d8466cdb /tools
parentb290a1214d0e78fb0a613b232b15d3e423292e1e (diff)
Handle excess whitespace in strings
Whitespace between words and tags doesn't matter to HTML. Indeed, the text in a translatable element may be formatted any way over any number of lines, so we normalize all consecutive whitespace to be just one space character and strip whitespace from the beginning and end of the strings. This is so that translators are not confronted with strange newlines and whitespace on Transifex. [endlessm/eos-sdk#3291]
Diffstat (limited to 'tools')
-rwxr-xr-xtools/eos-html-extractor5
1 files changed, 4 insertions, 1 deletions
diff --git a/tools/eos-html-extractor b/tools/eos-html-extractor
index f82afa1..ef8fc54 100755
--- a/tools/eos-html-extractor
+++ b/tools/eos-html-extractor
@@ -10,6 +10,9 @@ import sys
from bs4 import BeautifulSoup
from html.parser import HTMLParser
+def normalize_string(string):
+ return re.sub(r'\s+', ' ', string.strip())
+
# Parser that adds line numbers to the HTML strings that need translating
class TranslatableHTMLParser(HTMLParser):
@@ -38,7 +41,7 @@ class TranslatableHTMLParser(HTMLParser):
if comment_line + comment_length == code_line:
optional_comment = ' '.join(comment_string.split())
- self.all_translatable_data.append((data.strip(), code_line, optional_comment))
+ self.all_translatable_data.append((normalize_string(data), code_line, optional_comment))
def handle_comment(self, comment):
self._comments_with_line_numbers.append((comment, self.getpos()[0]))