csd2docbook.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

# -*- coding: utf-8 -*-
#
# Copyright (C) 2007 François Pinot
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

from __future__ import print_function
import glob, re
from pygments import highlight
from pygments.formatter import Formatter
from pygments.lexers import CsoundDocumentLexer, CsoundOrchestraLexer, CsoundScoreLexer, _csound_builtins
from pygments.token import Token, STANDARD_TYPES
from xml.sax.saxutils import escape


# FluidSynth opcodes are not installed with Csound 6.11 and later on macOS, and
# they were removed from _csound_builtins.OPCODES at
# https://bitbucket.org/birkenfeld/pygments-main/diff/pygments/lexers/_csound_builtins.py?diff2=1d8eed62b214
opcodeNames = '''
fluidAllOut
fluidCCi
fluidCCk
fluidControl
fluidEngine
fluidInfo
fluidLoad
fluidNote
fluidOut
fluidProgramSelect
fluidSetInterpMethod
'''.split()
for opcodeName in opcodeNames:
    if opcodeName in _csound_builtins.OPCODES:
        print('_csound_builtins.OPCODES already contains ‘' + opcodeName + '’')
    else:
        _csound_builtins.OPCODES.add(opcodeName)


# To match the syntax highlighting from
# https://github.com/csound/manual/tree/c1b097bae66e04c2b11395f12a03f0d67fc1f059
# as closely as possible, change the token type of score statements from Keyword
# to Name.Builtin so that they look like opcodes. With Pygments 2.3 and later,
# this doesn’t apply to m statements (https://csound.com/docs/manual/m.html),
# n statements (https://csound.com/docs/manual/n.html), or p symbols
# (https://csound.com/docs/manual/ScoreNextp.html); see
# https://bitbucket.org/birkenfeld/pygments-main/commits/b60b37f36a03802e0c5ed412268d37bbc55eb9fb#Lpygments/lexers/csound.pyT159
stateTuple = CsoundScoreLexer.tokens['root'][3]
CsoundScoreLexer.tokens['root'][3] = stateTuple[:1] + (Token.Name.Builtin,) + stateTuple[2:]


# See http://pygments.org/docs/formatterdevelopment/.
class DocBookFormatter(Formatter):
    def format(self, tokensource, outfile):
        outfile.write('<programlisting>\n')

        currentTypeString = ''
        currentValue = ''
        for ttype, value in tokensource:
            typeString = STANDARD_TYPES[ttype]
            if typeString == currentTypeString:
                currentValue += value
            else:
                if currentTypeString == 'k' and re.match('end(?:in|op)|instr|opcode', currentValue):
                    # If the current token is a Keyword and is one of endin,
                    # endop, instr, or opcode, treat it as a Keyword.Declaration
                    # token. This is only necessary with Pygments 2.1 through
                    # 2.2. In Pygments 2.3 and later, endin, endop, instr, and
                    # opcode are Keyword.Declaration tokens; see
                    # https://bitbucket.org/birkenfeld/pygments-main/commits/b60b37f36a03802e0c5ed412268d37bbc55eb9fb#Lpygments/lexers/csound.pyT249
                    currentTypeString = 'kd'
                elif currentTypeString == 'kt' and re.match('g?[afikSw]', currentValue) and typeString == 'n':
                    # If the current token is a Keyword.Type, has a value that
                    # matches a Csound type sigil, and is followed immediately
                    # by a name, then it’s a type sigil. Treat it as a Text
                    # token.
                    currentTypeString = ''
                self.writeToken(currentTypeString, currentValue, outfile)
                currentTypeString = typeString
                currentValue = value

        self.writeToken(currentTypeString, currentValue, outfile)

        outfile.write('</programlisting>\n')

    def writeToken(self, typeString, value, outfile):
        escapedValue = escape(value)
        # Don’t wrap Text tokens in emphasis elements.
        if typeString == '':
            outfile.write(escapedValue)
        else:
            outfile.write('<emphasis role="' + typeString + '">' + escapedValue + '</emphasis>')


for path in glob.glob('examples/*.csd'):
    with open(path, 'r') as file:
        code = file.read()
        with open(path.replace('examples', 'examples-xml') + '.xml', 'w') as file2:
            file2.write('<refsect1>\n')
            file2.write(highlight(code, CsoundDocumentLexer(), DocBookFormatter()))
            file2.write('</refsect1>\n')

path = 'examples/table1.inc'
with open(path, 'r') as file:
    code = file.read()
with open(path.replace('examples/', 'examples-xml/') + '.xml', 'w') as file:
    file.write(highlight(code, CsoundOrchestraLexer(), DocBookFormatter()))