diff options
author | Philip Chimento <philip@endlessm.com> | 2015-06-18 13:53:01 -0700 |
---|---|---|
committer | Philip Chimento <philip@endlessm.com> | 2015-06-18 13:53:01 -0700 |
commit | 0e2e2b25766b33e98860b59d1363be528477519d (patch) | |
tree | fa2e6fea86af4db60c885433fd10cfb1d8466cdb /tools | |
parent | b290a1214d0e78fb0a613b232b15d3e423292e1e (diff) |
Handle excess whitespace in strings
Whitespace between words and tags doesn't matter to HTML. Indeed, the
text in a translatable element may be formatted any way over any number
of lines, so we normalize all consecutive whitespace to be just one space
character and strip whitespace from the beginning and end of the strings.
This is so that translators are not confronted with strange newlines and
whitespace on Transifex.
[endlessm/eos-sdk#3291]
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/eos-html-extractor | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/tools/eos-html-extractor b/tools/eos-html-extractor index f82afa1..ef8fc54 100755 --- a/tools/eos-html-extractor +++ b/tools/eos-html-extractor @@ -10,6 +10,9 @@ import sys from bs4 import BeautifulSoup from html.parser import HTMLParser +def normalize_string(string): + return re.sub(r'\s+', ' ', string.strip()) + # Parser that adds line numbers to the HTML strings that need translating class TranslatableHTMLParser(HTMLParser): @@ -38,7 +41,7 @@ class TranslatableHTMLParser(HTMLParser): if comment_line + comment_length == code_line: optional_comment = ' '.join(comment_string.split()) - self.all_translatable_data.append((data.strip(), code_line, optional_comment)) + self.all_translatable_data.append((normalize_string(data), code_line, optional_comment)) def handle_comment(self, comment): self._comments_with_line_numbers.append((comment, self.getpos()[0])) |