#!/usr/bin/env python3 # Copyright 2013-2015 Endless Mobile, Inc. import argparse import io import os.path import re import sys from bs4 import BeautifulSoup from html.parser import HTMLParser def normalize_string(string): return re.sub(r'\s+', ' ', string.strip()) # Parser that adds line numbers to the HTML strings that need translating class TranslatableHTMLParser(HTMLParser): def __init__(self, translatable_strings): super().__init__() self.all_translatable_data = [] self._comments_with_line_numbers = [] self._translatable_strings = set(translatable_strings) def handle_data(self, data): if data not in self._translatable_strings: return code_line = self.getpos()[0] optional_comment = None if self._comments_with_line_numbers: # Determine if comment should be included most_recent_comment = self._comments_with_line_numbers[-1] comment_string, comment_line = most_recent_comment # Comment takes up at least one line by default (hence the +1) comment_length = len(re.findall(r'\n', comment_string)) + 1 # If the comment immediately preceded this string, include it if comment_line + comment_length == code_line: optional_comment = ' '.join(comment_string.split()) self.all_translatable_data.append((normalize_string(data), code_line, optional_comment)) def handle_comment(self, comment): self._comments_with_line_numbers.append((comment, self.getpos()[0])) # Ensure stdout is UTF-8 default_out = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') parser = argparse.ArgumentParser(description='Extract translatable strings ' + 'from HTML files. This is xgettext for HTML.') parser.add_argument('input_file', type=str, help='Input file to scan') parser.add_argument('top_srcdir', type=str, nargs='?', default='.', help='Top-level source directory (for printing correct #line directives)') parser.add_argument('-o', '--output', default=default_out, type=argparse.FileType('w', encoding='utf-8'), help='File to write (default: stdout)') args = parser.parse_args() # Path from current directory to top-level app directory html_file = args.input_file top_dir = args.top_srcdir final_path = os.path.relpath(html_file, top_dir) out_file = args.output # Create the BeautifulSoup HTML-parsing object with open(html_file, encoding='utf-8') as f: page = f.read() soup = BeautifulSoup(page) # Extract all translatable strings from that HTML translatable_divs = soup.find_all(attrs={'name': 'translatable'}) translatable_strings = map(lambda div: div.contents[0], translatable_divs) # Find the line numbers for those strings parser = TranslatableHTMLParser(translatable_strings) parser.feed(page) # Write out all info about the translatable strings found in this file for string, line_num, optional_comment in parser.all_translatable_data: out_file.write('#line {line} "{path}"\n'.format(line=line_num, path=final_path)) if optional_comment: out_file.write('// {}\n'.format(optional_comment)) out_file.write('_("{string}");\n'.format(string=string))