summaryrefslogtreecommitdiff
path: root/doc
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-11-12 21:48:47 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2017-11-12 21:48:47 -0800
commita89d4aa924e0515bf8860aaf30a7a50cf5a08e0c (patch)
tree25d02e1f58ccbaa65230a764fd39a06a96501c2f /doc
parentb85921259dd2fb40e442d5d38ac6a0c72bca7566 (diff)
lua-filters.md: add wordcount example.
Diffstat (limited to 'doc')
-rw-r--r--doc/lua-filters.md47
1 files changed, 47 insertions, 0 deletions
diff --git a/doc/lua-filters.md b/doc/lua-filters.md
index 5703a68ec..cee4240c7 100644
--- a/doc/lua-filters.md
+++ b/doc/lua-filters.md
@@ -371,6 +371,10 @@ at the "outer level" are included; this ignores blocks inside
nested constructs, like list items.)
``` lua
+-- creates a handout from an article, using its headings,
+-- blockquotes, numbered examples, figures, and any
+-- Divs with class "handout"
+
function Pandoc(doc)
local hblocks = {}
for i,el in pairs(doc.blocks) do
@@ -386,6 +390,49 @@ function Pandoc(doc)
end
```
+## Counting words in a document
+
+This filter counts the words in the body of a document (omitting
+metadata like titles and abstracts), including words in code.
+It should be more accurate than `wc -w` run directly on a
+Markdown document, since the latter will count markup
+characters, like the `#` in front of an ATX header, or
+tags in HTML documents, as words. To run it,
+`pandoc --lua-filter wordcount.lua myfile.md`.
+
+``` lua
+-- counts words in a document
+
+words = 0
+
+wordcount = {
+ Str = function(el)
+ -- we don't count a word if it's entirely punctuation:
+ local s = el.text:gsub("%p","")
+ if #s > 0 then
+ words = words + 1
+ end
+ end,
+
+ Code = function(el)
+ _,n = el.text:gsub("%S+","")
+ words = words + n
+ end,
+
+ CodeBlock = function(el)
+ _,n = el.text:gsub("%S+","")
+ words = words + n
+ end
+}
+
+function Pandoc(el)
+ -- skip metadata, just count body:
+ pandoc.walk_block(pandoc.Div(el.blocks), wordcount)
+ print(words .. " words in body")
+ os.exit(0)
+end
+```
+
## Converting ABC code to music notation
This filter replaces code blocks with class `abc` with