summaryrefslogtreecommitdiff
path: root/esxml.el
blob: e71ded2de5985fbd272ad086b0ce2b1a3f03f2c9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
;;; esxml.el --- Library for working with xml via esxml and sxml
;; Copyright (C) 2012

;; Author: Evan Izaksonas-Smith <izak0002 at umn dot edu>
;; Maintainer: Evan Izaksonas-Smith
;; Created: 15th August 2012
;; Version: 0.3.3
;; Keywords: tools, lisp, comm
;; Description: A library for easily generating XML/XHTML in elisp
;;
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with this program.  If not, see <http://www.gnu.org/licenses/>.

;;; Commentary:

;; This is XML/XHTML done with S-Expressions in EmacsLisp.  Simply,
;; this is the easiest way to write HTML or XML in Lisp.
;;
;; This library uses the native form of XML representation as used by
;; many libraries already included within emacs.  This representation
;; will be referred to as "esxml" throughout this library.  See
;; `esxml-to-xml' for a concise description of the format.
;;
;; This library is not intended to be used directly by a user, though
;; it certainly could be.  It could be used to generate static html,
;; or use a library like `elnode' to serve dynamic pages.  Or even to
;; extract a form from a site to produce an API.
;;
;; TODO: Better documentation, more convenience.
;;
;; NOTICE: Code base will be transitioning to using pcase instead of
;; destructuring bind wherever possible.  If this leads to hard to
;; debug code, please let me know, and I will do whatever I can to
;; resolve these issues.
;;
;;; Code:
(require 'cl)
(require 'xml)
(require 'pcase)

(defun string-trim-whitespace (string)
  "A simple function, strips the whitespace from beginning and
end of the string.  Leaves all other whitespace untouched."
  (replace-regexp-in-string
   (rx string-start (* whitespace)
       (group (+? anything))
       (* whitespace) string-end)
   "\\1"
   string))

(defun esxml-trim-ws (esxml)
  "This may cause problems, is intended for parsing xml into sxml
but may eroneously delete desirable white space."
  (if (stringp esxml) (string-trim-whitespace esxml)
    (pcase-let ((`(,tag ,attrs . ,body) esxml))
      `(,tag ,attrs
             ,@(mapcar 'esxml-trim-ws body)))))

(defun attrp (attr)
  "Returns t if attr is a an esxml attribute.
An esxml attribute is a cons of the form (symbol . string)"
 (and (consp attr)
       (symbolp (car attr))
       (stringp (cdr attr))))

(defun esxml--convert-pair (attr)
  "Converts from cons cell to attribute pair.  Not intended for
general use."
  (pcase-let ((`(,car . ,cdr) attr))
    (check-type cdr string)
    (concat (symbol-name car)
            "="
            (prin1-to-string cdr))))

(defun attrsp (attrs)
    "Returns t if attrs is a list of esxml attributes.

See: `attrp'"
  (and (listp attrs)
       (every (lambda (attr)
                (and (consp attr)
                     (symbolp (car attr))
                     (stringp (cdr attr))))
              attrs)))

(defun esxml-validate-form (esxml)
  "A fast esxml validator.  Will error on invalid subparts making
it suitable for hindsight testing."
  (cond ((stringp esxml) nil)
        ((< (length esxml) 2)
         (error "%s is too short to be a valid esxml expression" esxml))
        (t (pcase-let ((`(,tag ,attrs . ,body) esxml))
             (check-type tag symbol)
             (check-type attrs attrs)
             (mapcar 'esxml-validate-form body)))))

;; While the following could certainly have been written using format,
;; concat makes them easier to read.  Update later if neccesary for
;; efficiency.

;; Though at first glance the recursive nature of this function might
;; give one pause, since xml is a recursive data type, a recursive
;; parser is an optimal strategy.  each node will be visited exactly
;; once during the transformation.
;;
;; Further, since a string is a terminal node and since xml can be
;; represented as a string, non dynamic portions of the page may be
;; precached quite easily.
(defun esxml--to-xml-recursive (esxml)
  (if (stringp esxml) esxml
    (pcase-let ((`(,tag ,attrs . ,body) esxml))
      ;; code goes here to catch invalid data.
      (concat "<" (symbol-name tag)
              (when attrs
                (concat " " (mapconcat 'esxml--convert-pair attrs " ")))
              (if body
                  (concat ">" (mapconcat 'esxml--to-xml-recursive body "")
                          "</" (symbol-name tag) ">")
                "/>")))))

(defun esxml-to-xml (esxml)
  "This translates an esxml expression, i.e. that which is
returned by xml-parse-region.  The structure is defined as a
string or a list where the first element is the tag the second is
an alist of attribute value pairs and the remainder of the list
is 0 or more esxml elements.

 (TAG ATTRS &rest BODY) || STRING

TAG: is the tag and must be a symbol.

ATTRS: is an alist of attribute pairs each pair must be of the
       form (KEY . VALUE).

KEY: is the name of the attribute and must be a symbol.

VALUE: is the value of the attribute and must be a string.

BODY: is zero or more esxml expressions.  Having no body forms
      implies that the tag should be self closed.  If there is
      one or more body forms the tag will always be explicitly
      closed, even if they are the empty string.

STRING: if the esxml expression is a string it is returned
        unchanged, this allows for caching of any constant parts,
        such as headers and footers.
"
  (condition-case nil
      (esxml--to-xml-recursive esxml)
    (error (esxml-validate-form esxml))))

(defun pp-esxml-to-xml (esxml)
  "This translates an esxml expresion as `esxml-to-xml' but
indents it for ease of human readability, it is neccesarrily
slower and will produce longer output."
  (cond ((stringp esxml) esxml)
        ((and (listp esxml)
              (> (length esxml) 1))
         (pcase-let ((`(,tag ,attrs . ,body) esxml))
           (check-type tag symbol)
           (check-type attrs attrs)
           (concat "<" (symbol-name tag)
                   (when attrs
                     (concat " " (mapconcat 'esxml--convert-pair attrs " ")))
                   (if body
                       (concat ">" (if (every 'stringp body)
                                       (mapconcat 'identity body " ")
                                     (concat "\n"
                                             (replace-regexp-in-string
                                              "^" "  "
                                              (mapconcat 'pp-esxml-to-xml body "\n"))
                                             "\n"))
                               "</" (symbol-name tag) ">")
                     "/>"))))
        (t (error "%s is not a valid esxml expression" esxml))))

(defun sxml-to-esxml (sxml)
  "Translates sxml to esxml so the common standard can be used.
See: http://okmij.org/ftp/Scheme/SXML.html."
  (pcase sxml
    (`(,tag (@ . ,attrs) . ,body)
     `(,tag ,(mapcar (lambda (attr)
                       (cons (first attr)
                             (or (second attr)
                                 (prin1-to-string (first attr)))))
                     attrs)
            ,@(mapcar 'sxml-to-esxml body)))
    (`(,tag . ,body)
     `(,tag nil
            ,@(mapcar 'sxml-to-esxml body)))
    ((and sxml (pred stringp)) sxml)))

(defun sxml-to-xml (sxml)
  "Translates sxml to xml, via esxml, hey it's only a constant
factor. :)"
  (esxml-to-xml (sxml-to-esxml sxml)))



;; TODO: make agnostic with respect to libxml vs xml.el
(defun xml-to-esxml (string &optional trim)
  (with-temp-buffer
    (insert string)
    (let ((parse-tree (libxml-parse-xml-region (point-min)
                                               (point-max))))
      (if trim
          (esxml-trim-ws parse-tree)
        parse-tree))))

;; TODO, move to esxpath when mature
(defun esxml-get-by-key (esxml key value)
  "Returns a list of all elements whose wttribute KEY match
VALUE.  KEY should be a symbol, and VALUE should be a string.
Will not recurse below a match."
  (unless (stringp esxml)
    (pcase-let ((`(,tag ,attrs . ,body) esxml))
      (if (equal value
                 (assoc-default key attrs))
          (list esxml)
        (apply 'append (mapcar (lambda (sexp)
                                 (esxml-get-by-key sexp key value))
                               body))))))

(defun esxml-get-tags (esxml tags)
  "Returns a list of all elements whose tag is a member of TAGS.
TAGS should be a list of tags to be matched against. Will not
recurse below a match."
  (unless (stringp esxml)
    (pcase-let ((`(,tag ,attrs . ,body) esxml))
      (if (member tag tags)
          (list esxml)
        (apply 'append (mapcar (lambda (sexp)
                                 (esxml-get-tags sexp tags))
                               body))))))

(defun esxml-get-forms (esxml)
  "Returns a list of all forms."
  (esxml-get-tags esxml '(form)))

;; taken from kv
(defmacro esxml-destructuring-mapcar (args sexp seq)
  (declare (indent 2))
  (let ((entry (make-symbol)))
    `(mapcar (lambda (,entry)
               (destructuring-bind ,args ,entry ,sexp))
             ,seq)))

(provide 'esxml)
;;; esxml.el ends here