Handle excess whitespace in strings

Whitespace between words and tags doesn't matter to HTML. Indeed, the text in a translatable element may be formatted any way over any number of lines, so we normalize all consecutive whitespace to be just one space character and strip whitespace from the beginning and end of the strings. This is so that translators are not confronted with strange newlines and whitespace on Transifex. [endlessm/eos-sdk#3291]
author: Philip Chimento <philip@endlessm.com> 2015-06-18 13:53:01 -0700
committer: Philip Chimento <philip@endlessm.com> 2015-06-18 13:53:01 -0700
commit: 0e2e2b25766b33e98860b59d1363be528477519d (patch)
tree: fa2e6fea86af4db60c885433fd10cfb1d8466cdb /tools
parent: b290a1214d0e78fb0a613b232b15d3e423292e1e (diff)
1 files changed, 4 insertions, 1 deletions
diff --git a/tools/eos-html-extractor b/tools/eos-html-extractor
index f82afa1..ef8fc54 100755
--- a/tools/eos-html-extractor
+++ b/tools/eos-html-extractor
@@ -10,6 +10,9 @@ import sys
 from bs4 import BeautifulSoup
 from html.parser import HTMLParser
 
+def normalize_string(string):
+    return re.sub(r'\s+', ' ', string.strip())
+
 
 # Parser that adds line numbers to the HTML strings that need translating
 class TranslatableHTMLParser(HTMLParser):
@@ -38,7 +41,7 @@ class TranslatableHTMLParser(HTMLParser):
             if comment_line + comment_length == code_line:
                 optional_comment = ' '.join(comment_string.split())
 
-        self.all_translatable_data.append((data.strip(), code_line, optional_comment))
+        self.all_translatable_data.append((normalize_string(data), code_line, optional_comment))
 
     def handle_comment(self, comment):
         self._comments_with_line_numbers.append((comment, self.getpos()[0]))
author	Philip Chimento <philip@endlessm.com>	2015-06-18 13:53:01 -0700
committer	Philip Chimento <philip@endlessm.com>	2015-06-18 13:53:01 -0700
commit	0e2e2b25766b33e98860b59d1363be528477519d (patch)
tree	fa2e6fea86af4db60c885433fd10cfb1d8466cdb /tools
parent	b290a1214d0e78fb0a613b232b15d3e423292e1e (diff)