#!/usr/bin/env python

# Copyright 2013-2015 Endless Mobile, Inc.

import argparse
import os.path
import re
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser


# Parser that adds line numbers to the HTML strings that need translating
class TranslatableHTMLParser(HTMLParser):
    def handle_data(self, data):
        if data not in translatable_strings:
            return

        # Determine if comment should be included
        most_recent_comment = comments_with_line_numbers[-1]
        comment_string, comment_line = most_recent_comment
        code_line = self.getpos()[0]

        # Comment takes up at least one line by default (hence the +1)
        comment_length = len(re.findall(r'\n', comment_string)) + 1
        optional_comment = ''

        # If the comment immediately preceded this string, include it
        if comment_line + comment_length == code_line:
            optional_comment = ' '.join(comment_string.split())
        all_translatable_data.append((data.strip(), code_line, optional_comment))

    def handle_comment(self, comment):
        comments_with_line_numbers.append((comment, self.getpos()[0]))

parser = argparse.ArgumentParser(description='Extract translatable strings ' +
    'from HTML files. This is xgettext for HTML.')
parser.add_argument('input_file', type=str,
    help='Input file to scan')
parser.add_argument('top_srcdir', type=str, nargs='?', default='.',
    help='Top-level source directory (for printing correct #line directives)')
args = parser.parse_args()

# Path from current directory to top-level app directory
html_file = args.input_file
top_dir = args.top_srcdir
final_path = os.path.relpath(html_file, top_dir)

# Create the BeautifulSoup HTML-parsing object
with open(html_file) as f:
    page = f.read()
soup = BeautifulSoup(page)

# Extract all translatable strings from that HTML
translatable_divs = soup.find_all(attrs={'name': 'translatable'})
translatable_strings = map(lambda div: div.contents[0].encode('utf-8'),
    translatable_divs)

# Find the line numbers for those strings
all_translatable_data = []
comments_with_line_numbers = []
parser = TranslatableHTMLParser()
parser.feed(page)

# Write out all info about the translatable strings found in this file
for string, line_num, optional_comment in all_translatable_data:
    print('#line {line} "{path}"'.format(line=line_num, path=final_path))
    if optional_comment != '':
        print('// ' + optional_comment)
    print('_("{string}");'.format(string=string))