diff options
author | Manoj Srivastava <srivasta@debian.org> | 2018-02-08 12:09:27 -0800 |
---|---|---|
committer | Manoj Srivastava <srivasta@debian.org> | 2018-02-08 12:09:27 -0800 |
commit | 7a39ce3d46f3d049ead08a455c5fcb1b37e153e3 (patch) | |
tree | 22dbef6e18ad76c91be20301ca2e5f9a39a98915 /doc | |
parent | 5372d368306be0259401d04dcca0172c33cf43b0 (diff) |
New upstream version 2.6.4
Diffstat (limited to 'doc')
-rw-r--r-- | doc/Makefile.in | 883 | ||||
-rw-r--r-- | doc/flex.1 | 163 | ||||
-rw-r--r-- | doc/flex.info | 281 | ||||
-rw-r--r-- | doc/flex.info-1 | 7791 | ||||
-rw-r--r-- | doc/flex.info-2 | bin | 0 -> 26966 bytes | |||
-rw-r--r-- | doc/stamp-vti | 4 | ||||
-rw-r--r-- | doc/version.texi | 4 |
7 files changed, 9126 insertions, 0 deletions
diff --git a/doc/Makefile.in b/doc/Makefile.in new file mode 100644 index 0000000..5e65a31 --- /dev/null +++ b/doc/Makefile.in @@ -0,0 +1,883 @@ +# Makefile.in generated by automake 1.15 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2014 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = doc +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_cc_for_build.m4 \ + $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/iconv.m4 \ + $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/lib-ld.m4 \ + $(top_srcdir)/m4/lib-link.m4 $(top_srcdir)/m4/lib-prefix.m4 \ + $(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/nls.m4 \ + $(top_srcdir)/m4/po.m4 $(top_srcdir)/m4/progtest.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/version.texi \ + $(srcdir)/stamp-vti $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +AM_V_DVIPS = $(am__v_DVIPS_@AM_V@) +am__v_DVIPS_ = $(am__v_DVIPS_@AM_DEFAULT_V@) +am__v_DVIPS_0 = @echo " DVIPS " $@; +am__v_DVIPS_1 = +AM_V_MAKEINFO = $(am__v_MAKEINFO_@AM_V@) +am__v_MAKEINFO_ = $(am__v_MAKEINFO_@AM_DEFAULT_V@) +am__v_MAKEINFO_0 = @echo " MAKEINFO" $@; +am__v_MAKEINFO_1 = +AM_V_INFOHTML = $(am__v_INFOHTML_@AM_V@) +am__v_INFOHTML_ = $(am__v_INFOHTML_@AM_DEFAULT_V@) +am__v_INFOHTML_0 = @echo " INFOHTML" $@; +am__v_INFOHTML_1 = +AM_V_TEXI2DVI = $(am__v_TEXI2DVI_@AM_V@) +am__v_TEXI2DVI_ = $(am__v_TEXI2DVI_@AM_DEFAULT_V@) +am__v_TEXI2DVI_0 = @echo " TEXI2DVI" $@; +am__v_TEXI2DVI_1 = +AM_V_TEXI2PDF = $(am__v_TEXI2PDF_@AM_V@) +am__v_TEXI2PDF_ = $(am__v_TEXI2PDF_@AM_DEFAULT_V@) +am__v_TEXI2PDF_0 = @echo " TEXI2PDF" $@; +am__v_TEXI2PDF_1 = +AM_V_texinfo = $(am__v_texinfo_@AM_V@) +am__v_texinfo_ = $(am__v_texinfo_@AM_DEFAULT_V@) +am__v_texinfo_0 = -q +am__v_texinfo_1 = +AM_V_texidevnull = $(am__v_texidevnull_@AM_V@) +am__v_texidevnull_ = $(am__v_texidevnull_@AM_DEFAULT_V@) +am__v_texidevnull_0 = > /dev/null +am__v_texidevnull_1 = +INFO_DEPS = $(srcdir)/flex.info +TEXINFO_TEX = $(top_srcdir)/build-aux/texinfo.tex +am__TEXINFO_TEX_DIR = $(top_srcdir)/build-aux +DVIS = flex.dvi +PDFS = flex.pdf +PSS = flex.ps +HTMLS = flex.html +TEXINFOS = flex.texi +TEXI2PDF = $(TEXI2DVI) --pdf --batch +MAKEINFOHTML = $(MAKEINFO) --html +AM_MAKEINFOHTMLFLAGS = $(AM_MAKEINFOFLAGS) +DVIPS = dvips +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__installdirs = "$(DESTDIR)$(infodir)" "$(DESTDIR)$(man1dir)" +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +man1dir = $(mandir)/man1 +NROFF = nroff +MANS = $(dist_man_MANS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(dist_man_MANS) $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/mdate-sh \ + $(top_srcdir)/build-aux/texinfo.tex +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BUILD_EXEEXT = @BUILD_EXEEXT@ +BUILD_OBJEXT = @BUILD_OBJEXT@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_FOR_BUILD = @CC_FOR_BUILD@ +CFLAGS = @CFLAGS@ +CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CPPFLAGS_FOR_BUILD = @CPPFLAGS_FOR_BUILD@ +CPP_FOR_BUILD = @CPP_FOR_BUILD@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +HELP2MAN = @HELP2MAN@ +INDENT = @INDENT@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTLLIBS = @INTLLIBS@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LDFLAGS_FOR_BUILD = @LDFLAGS_FOR_BUILD@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBOBJS = @LIBOBJS@ +LIBPTHREAD = @LIBPTHREAD@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +M4 = @M4@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHARED_VERSION_INFO = @SHARED_VERSION_INFO@ +SHELL = @SHELL@ +STRIP = @STRIP@ +TEXI2DVI = @TEXI2DVI@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +WARNINGFLAGS = @WARNINGFLAGS@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ +YACC = @YACC@ +YFLAGS = @YFLAGS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CC_FOR_BUILD = @ac_ct_CC_FOR_BUILD@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +FLEX = $(top_builddir)/src/flex$(EXEEXT) +info_TEXINFOS = flex.texi +dist_man_MANS = flex.1 +MAINTAINERCLEANFILES = flex.1 +CLEANFILES = *.aux *.cp *.cps *.fn *.fns *.hk *.hks *.ky *.log \ + *.op *.ops *.pg *.toc *.tp *.tps *.vr *.vrs + +all: all-am + +.SUFFIXES: +.SUFFIXES: .dvi .html .info .pdf .ps .texi +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +.texi.info: + $(AM_V_MAKEINFO)restore=: && backupdir="$(am__leading_dot)am$$$$" && \ + am__cwd=`pwd` && $(am__cd) $(srcdir) && \ + rm -rf $$backupdir && mkdir $$backupdir && \ + if ($(MAKEINFO) --version) >/dev/null 2>&1; then \ + for f in $@ $@-[0-9] $@-[0-9][0-9] $(@:.info=).i[0-9] $(@:.info=).i[0-9][0-9]; do \ + if test -f $$f; then mv $$f $$backupdir; restore=mv; else :; fi; \ + done; \ + else :; fi && \ + cd "$$am__cwd"; \ + if $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + -o $@ $<; \ + then \ + rc=0; \ + $(am__cd) $(srcdir); \ + else \ + rc=$$?; \ + $(am__cd) $(srcdir) && \ + $$restore $$backupdir/* `echo "./$@" | sed 's|[^/]*$$||'`; \ + fi; \ + rm -rf $$backupdir; exit $$rc + +.texi.dvi: + $(AM_V_TEXI2DVI)TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ + $(TEXI2DVI) $(AM_V_texinfo) --build-dir=$(@:.dvi=.t2d) -o $@ $(AM_V_texidevnull) \ + $< + +.texi.pdf: + $(AM_V_TEXI2PDF)TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + MAKEINFO='$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir)' \ + $(TEXI2PDF) $(AM_V_texinfo) --build-dir=$(@:.pdf=.t2p) -o $@ $(AM_V_texidevnull) \ + $< + +.texi.html: + $(AM_V_MAKEINFO)rm -rf $(@:.html=.htp) + $(AM_V_at)if $(MAKEINFOHTML) $(AM_MAKEINFOHTMLFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) \ + -o $(@:.html=.htp) $<; \ + then \ + rm -rf $@ && mv $(@:.html=.htp) $@; \ + else \ + rm -rf $(@:.html=.htp); exit 1; \ + fi +$(srcdir)/flex.info: flex.texi $(srcdir)/version.texi +flex.dvi: flex.texi $(srcdir)/version.texi +flex.pdf: flex.texi $(srcdir)/version.texi +flex.html: flex.texi $(srcdir)/version.texi +$(srcdir)/version.texi: $(srcdir)/stamp-vti +$(srcdir)/stamp-vti: flex.texi $(top_srcdir)/configure + @(dir=.; test -f ./flex.texi || dir=$(srcdir); \ + set `$(SHELL) $(top_srcdir)/build-aux/mdate-sh $$dir/flex.texi`; \ + echo "@set UPDATED $$1 $$2 $$3"; \ + echo "@set UPDATED-MONTH $$2 $$3"; \ + echo "@set EDITION $(VERSION)"; \ + echo "@set VERSION $(VERSION)") > vti.tmp$$$$ && \ + (cmp -s vti.tmp$$$$ $(srcdir)/version.texi \ + || (echo "Updating $(srcdir)/version.texi" && \ + cp vti.tmp$$$$ $(srcdir)/version.texi.tmp$$$$ && \ + mv $(srcdir)/version.texi.tmp$$$$ $(srcdir)/version.texi)) && \ + rm -f vti.tmp$$$$ $(srcdir)/version.texi.$$$$ + @cp $(srcdir)/version.texi $@ + +mostlyclean-vti: + -rm -f vti.tmp* $(srcdir)/version.texi.tmp* + +maintainer-clean-vti: + -rm -f $(srcdir)/stamp-vti $(srcdir)/version.texi +.dvi.ps: + $(AM_V_DVIPS)TEXINPUTS="$(am__TEXINFO_TEX_DIR)$(PATH_SEPARATOR)$$TEXINPUTS" \ + $(DVIPS) $(AM_V_texinfo) -o $@ $< + +uninstall-dvi-am: + @$(NORMAL_UNINSTALL) + @list='$(DVIS)'; test -n "$(dvidir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(dvidir)/$$f'"; \ + rm -f "$(DESTDIR)$(dvidir)/$$f"; \ + done + +uninstall-html-am: + @$(NORMAL_UNINSTALL) + @list='$(HTMLS)'; test -n "$(htmldir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -rf '$(DESTDIR)$(htmldir)/$$f'"; \ + rm -rf "$(DESTDIR)$(htmldir)/$$f"; \ + done + +uninstall-info-am: + @$(PRE_UNINSTALL) + @if test -d '$(DESTDIR)$(infodir)' && $(am__can_run_installinfo); then \ + list='$(INFO_DEPS)'; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + echo " install-info --info-dir='$(DESTDIR)$(infodir)' --remove '$(DESTDIR)$(infodir)/$$relfile'"; \ + if install-info --info-dir="$(DESTDIR)$(infodir)" --remove "$(DESTDIR)$(infodir)/$$relfile"; \ + then :; else test ! -f "$(DESTDIR)$(infodir)/$$relfile" || exit 1; fi; \ + done; \ + else :; fi + @$(NORMAL_UNINSTALL) + @list='$(INFO_DEPS)'; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + relfile_i=`echo "$$relfile" | sed 's|\.info$$||;s|$$|.i|'`; \ + (if test -d "$(DESTDIR)$(infodir)" && cd "$(DESTDIR)$(infodir)"; then \ + echo " cd '$(DESTDIR)$(infodir)' && rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]"; \ + rm -f $$relfile $$relfile-[0-9] $$relfile-[0-9][0-9] $$relfile_i[0-9] $$relfile_i[0-9][0-9]; \ + else :; fi); \ + done + +uninstall-pdf-am: + @$(NORMAL_UNINSTALL) + @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(pdfdir)/$$f'"; \ + rm -f "$(DESTDIR)$(pdfdir)/$$f"; \ + done + +uninstall-ps-am: + @$(NORMAL_UNINSTALL) + @list='$(PSS)'; test -n "$(psdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " rm -f '$(DESTDIR)$(psdir)/$$f'"; \ + rm -f "$(DESTDIR)$(psdir)/$$f"; \ + done + +dist-info: $(INFO_DEPS) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(INFO_DEPS)'; \ + for base in $$list; do \ + case $$base in \ + $(srcdir)/*) base=`echo "$$base" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$base; then d=.; else d=$(srcdir); fi; \ + base_i=`echo "$$base" | sed 's|\.info$$||;s|$$|.i|'`; \ + for file in $$d/$$base $$d/$$base-[0-9] $$d/$$base-[0-9][0-9] $$d/$$base_i[0-9] $$d/$$base_i[0-9][0-9]; do \ + if test -f $$file; then \ + relfile=`expr "$$file" : "$$d/\(.*\)"`; \ + test -f "$(distdir)/$$relfile" || \ + cp -p $$file "$(distdir)/$$relfile"; \ + else :; fi; \ + done; \ + done + +mostlyclean-aminfo: + -rm -rf flex.t2d flex.t2p + +clean-aminfo: + -test -z "flex.dvi flex.pdf flex.ps flex.html" \ + || rm -rf flex.dvi flex.pdf flex.ps flex.html + +maintainer-clean-aminfo: + @list='$(INFO_DEPS)'; for i in $$list; do \ + i_i=`echo "$$i" | sed 's|\.info$$||;s|$$|.i|'`; \ + echo " rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]"; \ + rm -f $$i $$i-[0-9] $$i-[0-9][0-9] $$i_i[0-9] $$i_i[0-9][0-9]; \ + done +install-man1: $(dist_man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(dist_man_MANS)'; \ + test -n "$(man1dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.1[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } + +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(dist_man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.1[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$(top_distdir)" distdir="$(distdir)" \ + dist-info +check-am: all-am +check: check-am +all-am: Makefile $(INFO_DEPS) $(MANS) +installdirs: + for dir in "$(DESTDIR)$(infodir)" "$(DESTDIR)$(man1dir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +clean: clean-am + +clean-am: clean-aminfo clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: $(DVIS) + +html: html-am + +html-am: $(HTMLS) + +info: info-am + +info-am: $(INFO_DEPS) + +install-data-am: install-info-am install-man + +install-dvi: install-dvi-am + +install-dvi-am: $(DVIS) + @$(NORMAL_INSTALL) + @list='$(DVIS)'; test -n "$(dvidir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(dvidir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(dvidir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(dvidir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(dvidir)" || exit $$?; \ + done +install-exec-am: + +install-html: install-html-am + +install-html-am: $(HTMLS) + @$(NORMAL_INSTALL) + @list='$(HTMLS)'; list2=; test -n "$(htmldir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p" || test -d "$$p"; then d=; else d="$(srcdir)/"; fi; \ + $(am__strip_dir) \ + d2=$$d$$p; \ + if test -d "$$d2"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)/$$f'"; \ + $(MKDIR_P) "$(DESTDIR)$(htmldir)/$$f" || exit 1; \ + echo " $(INSTALL_DATA) '$$d2'/* '$(DESTDIR)$(htmldir)/$$f'"; \ + $(INSTALL_DATA) "$$d2"/* "$(DESTDIR)$(htmldir)/$$f" || exit $$?; \ + else \ + list2="$$list2 $$d2"; \ + fi; \ + done; \ + test -z "$$list2" || { echo "$$list2" | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(htmldir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(htmldir)" || exit $$?; \ + done; } +install-info: install-info-am + +install-info-am: $(INFO_DEPS) + @$(NORMAL_INSTALL) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(infodir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(infodir)" || exit 1; \ + fi; \ + for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + esac; \ + if test -f $$file; then d=.; else d=$(srcdir); fi; \ + file_i=`echo "$$file" | sed 's|\.info$$||;s|$$|.i|'`; \ + for ifile in $$d/$$file $$d/$$file-[0-9] $$d/$$file-[0-9][0-9] \ + $$d/$$file_i[0-9] $$d/$$file_i[0-9][0-9] ; do \ + if test -f $$ifile; then \ + echo "$$ifile"; \ + else : ; fi; \ + done; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(infodir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(infodir)" || exit $$?; done + @$(POST_INSTALL) + @if $(am__can_run_installinfo); then \ + list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \ + for file in $$list; do \ + relfile=`echo "$$file" | sed 's|^.*/||'`; \ + echo " install-info --info-dir='$(DESTDIR)$(infodir)' '$(DESTDIR)$(infodir)/$$relfile'";\ + install-info --info-dir="$(DESTDIR)$(infodir)" "$(DESTDIR)$(infodir)/$$relfile" || :;\ + done; \ + else : ; fi +install-man: install-man1 + +install-pdf: install-pdf-am + +install-pdf-am: $(PDFS) + @$(NORMAL_INSTALL) + @list='$(PDFS)'; test -n "$(pdfdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pdfdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pdfdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(pdfdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(pdfdir)" || exit $$?; done +install-ps: install-ps-am + +install-ps-am: $(PSS) + @$(NORMAL_INSTALL) + @list='$(PSS)'; test -n "$(psdir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(psdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(psdir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(psdir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(psdir)" || exit $$?; done +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-aminfo \ + maintainer-clean-generic maintainer-clean-vti + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-aminfo mostlyclean-generic \ + mostlyclean-libtool mostlyclean-vti + +pdf: pdf-am + +pdf-am: $(PDFS) + +ps: ps-am + +ps-am: $(PSS) + +uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \ + uninstall-man uninstall-pdf-am uninstall-ps-am + +uninstall-man: uninstall-man1 + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-aminfo clean-generic \ + clean-libtool cscopelist-am ctags-am dist-info distclean \ + distclean-generic distclean-libtool distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-man1 install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-aminfo maintainer-clean-generic \ + maintainer-clean-vti mostlyclean mostlyclean-aminfo \ + mostlyclean-generic mostlyclean-libtool mostlyclean-vti pdf \ + pdf-am ps ps-am tags-am uninstall uninstall-am \ + uninstall-dvi-am uninstall-html-am uninstall-info-am \ + uninstall-man uninstall-man1 uninstall-pdf-am uninstall-ps-am + +.PRECIOUS: Makefile + + +flex.1: $(top_srcdir)/configure.ac $(top_srcdir)/src/flex.skl $(top_srcdir)/src/options.c $(top_srcdir)/src/options.h | $(FLEX) + $(HELP2MAN) --name='$(PACKAGE_NAME)' --section=1 \ + --source='The Flex Project' --manual='Programming' \ + --output=$@ $(FLEX) \ + || rm -f $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/flex.1 b/doc/flex.1 new file mode 100644 index 0000000..a1cd269 --- /dev/null +++ b/doc/flex.1 @@ -0,0 +1,163 @@ +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.3. +.TH FLEX "1" "May 2017" "The Flex Project" "Programming" +.SH NAME +flex \- the fast lexical analyser generator +.SH SYNOPSIS +.B flex +[\fI\,OPTIONS\/\fR] [\fI\,FILE\/\fR]... +.SH DESCRIPTION +Generates programs that perform pattern\-matching on text. +.SS "Table Compression:" +.TP +\fB\-Ca\fR, \fB\-\-align\fR +trade off larger tables for better memory alignment +.TP +\fB\-Ce\fR, \fB\-\-ecs\fR +construct equivalence classes +.TP +\fB\-Cf\fR +do not compress tables; use \fB\-f\fR representation +.TP +\fB\-CF\fR +do not compress tables; use \fB\-F\fR representation +.TP +\fB\-Cm\fR, \fB\-\-meta\-ecs\fR +construct meta\-equivalence classes +.TP +\fB\-Cr\fR, \fB\-\-read\fR +use read() instead of stdio for scanner input +.TP +\fB\-f\fR, \fB\-\-full\fR +generate fast, large scanner. Same as \fB\-Cfr\fR +.TP +\fB\-F\fR, \fB\-\-fast\fR +use alternate table representation. Same as \fB\-CFr\fR +.TP +\fB\-Cem\fR +default compression (same as \fB\-\-ecs\fR \fB\-\-meta\-ecs\fR) +.SS "Debugging:" +.TP +\fB\-d\fR, \fB\-\-debug\fR +enable debug mode in scanner +.TP +\fB\-b\fR, \fB\-\-backup\fR +write backing\-up information to lex.backup +.TP +\fB\-p\fR, \fB\-\-perf\-report\fR +write performance report to stderr +.TP +\fB\-s\fR, \fB\-\-nodefault\fR +suppress default rule to ECHO unmatched text +.TP +\fB\-T\fR, \fB\-\-trace\fR +flex should run in trace mode +.TP +\fB\-w\fR, \fB\-\-nowarn\fR +do not generate warnings +.TP +\fB\-v\fR, \fB\-\-verbose\fR +write summary of scanner statistics to stdout +.TP +\fB\-\-hex\fR +use hexadecimal numbers instead of octal in debug outputs +.SH FILES +.TP +\fB\-o\fR, \fB\-\-outfile\fR=\fI\,FILE\/\fR +specify output filename +.TP +\fB\-S\fR, \fB\-\-skel\fR=\fI\,FILE\/\fR +specify skeleton file +.TP +\fB\-t\fR, \fB\-\-stdout\fR +write scanner on stdout instead of lex.yy.c +.TP +\fB\-\-yyclass\fR=\fI\,NAME\/\fR +name of C++ class +.TP +\fB\-\-header\-file\fR=\fI\,FILE\/\fR +create a C header file in addition to the scanner +.HP +\fB\-\-tables\-file\fR[=\fI\,FILE\/\fR] write tables to FILE +.SS "Scanner behavior:" +.TP +\fB\-7\fR, \fB\-\-7bit\fR +generate 7\-bit scanner +.TP +\fB\-8\fR, \fB\-\-8bit\fR +generate 8\-bit scanner +.TP +\fB\-B\fR, \fB\-\-batch\fR +generate batch scanner (opposite of \fB\-I\fR) +.TP +\fB\-i\fR, \fB\-\-case\-insensitive\fR +ignore case in patterns +.TP +\fB\-l\fR, \fB\-\-lex\-compat\fR +maximal compatibility with original lex +.TP +\fB\-X\fR, \fB\-\-posix\-compat\fR +maximal compatibility with POSIX lex +.TP +\fB\-I\fR, \fB\-\-interactive\fR +generate interactive scanner (opposite of \fB\-B\fR) +.TP +\fB\-\-yylineno\fR +track line count in yylineno +.SS "Generated code:" +.TP +\-+, \fB\-\-c\fR++ +generate C++ scanner class +.TP +\fB\-Dmacro\fR[=\fI\,defn\/\fR] +#define macro defn (default defn is '1') +.TP +\fB\-L\fR, \fB\-\-noline\fR +suppress #line directives in scanner +.TP +\fB\-P\fR, \fB\-\-prefix\fR=\fI\,STRING\/\fR +use STRING as prefix instead of "yy" +.TP +\fB\-R\fR, \fB\-\-reentrant\fR +generate a reentrant C scanner +.TP +\fB\-\-bison\-bridge\fR +scanner for bison pure parser. +.TP +\fB\-\-bison\-locations\fR +include yylloc support. +.TP +\fB\-\-stdinit\fR +initialize yyin/yyout to stdin/stdout +.TP +\fB\-\-nounistd\fR +do not include <unistd.h> +.TP +\fB\-\-noFUNCTION\fR +do not generate a particular FUNCTION +.SS "Miscellaneous:" +.TP +\fB\-c\fR +do\-nothing POSIX option +.TP +\fB\-n\fR +do\-nothing POSIX option +.HP +\-? +.TP +\fB\-h\fR, \fB\-\-help\fR +produce this help message +.TP +\fB\-V\fR, \fB\-\-version\fR +report flex version +.SH "SEE ALSO" +The full documentation for +.B flex +is maintained as a Texinfo manual. If the +.B info +and +.B flex +programs are properly installed at your site, the command +.IP +.B info flex +.PP +should give you access to the complete manual. diff --git a/doc/flex.info b/doc/flex.info new file mode 100644 index 0000000..a972bcd --- /dev/null +++ b/doc/flex.info @@ -0,0 +1,281 @@ +This is flex.info, produced by makeinfo version 6.1 from flex.texi. + +The flex manual is placed under the same licensing conditions as the +rest of flex: + + Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2012 The Flex +Project. + + Copyright (C) 1990, 1997 The Regents of the University of California. +All rights reserved. + + This code is derived from software contributed to Berkeley by Vern +Paxson. + + The United States Government has rights in this work pursuant to +contract no. DE-AC03-76SF00098 between the United States Department of +Energy and the University of California. + + Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + + Neither the name of the University nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. +INFO-DIR-SECTION Programming +START-INFO-DIR-ENTRY +* flex: (flex). Fast lexical analyzer generator (lex replacement). +END-INFO-DIR-ENTRY + + +Indirect: +flex.info-1: 1622 +flex.info-2: 318745 + +Tag Table: +(Indirect) +Node: Top1622 +Node: Copyright9414 +Node: Reporting Bugs10933 +Node: Introduction11189 +Node: Simple Examples12018 +Node: Format15304 +Node: Definitions Section15759 +Ref: Definitions Section-Footnote-118017 +Node: Rules Section18085 +Node: User Code Section19243 +Node: Comments in the Input19681 +Node: Patterns21051 +Ref: case and character ranges27883 +Node: Matching31886 +Node: Actions35171 +Node: Generated Scanner44133 +Node: Start Conditions49136 +Node: Multiple Input Buffers59678 +Ref: Scanning Strings66221 +Node: EOF67850 +Node: Misc Macros69436 +Node: User Values72290 +Node: Yacc74615 +Node: Scanner Options75510 +Node: Options for Specifying Filenames78299 +Ref: option-header78525 +Ref: option-outfile79239 +Ref: option-stdout79564 +Node: Options Affecting Scanner Behavior80547 +Ref: option-case-insensitive80788 +Ref: option-lex-compat81221 +Ref: option-batch81753 +Ref: option-interactive82272 +Ref: option-7bit83626 +Ref: option-8bit84930 +Ref: option-default85342 +Ref: option-always-interactive85406 +Ref: option-posix86010 +Ref: option-stack87157 +Ref: option-stdinit87265 +Ref: option-yylineno87744 +Ref: option-yywrap88187 +Node: Code-Level And API Options88454 +Ref: option-ansi-definitions88681 +Ref: option-ansi-prototypes88756 +Ref: option-bison-bridge88829 +Ref: option-bison-locations89170 +Ref: option-noline89430 +Ref: option-reentrant89944 +Ref: option-c++90556 +Ref: option-array90682 +Ref: option-pointer90780 +Ref: option-prefix90907 +Ref: option-main92435 +Ref: option-nounistd92619 +Ref: option-yyclass93130 +Node: Options for Scanner Speed and Size93614 +Ref: option-align94164 +Ref: option-ecs94666 +Ref: option-meta-ecs95705 +Ref: option-read96193 +Ref: option-full98076 +Ref: option-fast98271 +Node: Debugging Options99195 +Ref: option-backup99382 +Ref: option-debug99927 +Ref: option-perf-report100649 +Ref: option-nodefault101275 +Ref: option-trace101593 +Ref: option-nowarn101884 +Ref: option-verbose101952 +Ref: option-warn102381 +Node: Miscellaneous Options102600 +Node: Performance103056 +Node: Cxx113303 +Node: Reentrant121395 +Node: Reentrant Uses122129 +Node: Reentrant Overview123691 +Node: Reentrant Example124491 +Node: Reentrant Detail125264 +Node: Specify Reentrant125768 +Node: Extra Reentrant Argument126418 +Node: Global Replacement127670 +Node: Init and Destroy Functions128905 +Node: Accessor Methods131426 +Node: Extra Data132773 +Node: About yyscan_t135040 +Node: Reentrant Functions135437 +Ref: bison-functions136921 +Node: Lex and Posix137660 +Node: Memory Management145007 +Ref: memory-management145153 +Node: The Default Memory Management145387 +Ref: The Default Memory Management-Footnote-1149207 +Node: Overriding The Default Memory Management149360 +Ref: Overriding The Default Memory Management-Footnote-1151774 +Node: A Note About yytext And Memory151938 +Node: Serialized Tables153178 +Ref: serialization153322 +Node: Creating Serialized Tables154102 +Node: Loading and Unloading Serialized Tables155717 +Node: Tables File Format157490 +Node: Diagnostics164515 +Node: Limitations167924 +Node: Bibliography169872 +Node: FAQ170542 +Node: When was flex born?175705 +Node: How do I expand backslash-escape sequences in C-style quoted strings?176082 +Node: Why do flex scanners call fileno if it is not ANSI compatible?177385 +Node: Does flex support recursive pattern definitions?178182 +Node: How do I skip huge chunks of input (tens of megabytes) while using flex?179029 +Node: Flex is not matching my patterns in the same order that I defined them.179496 +Node: My actions are executing out of order or sometimes not at all.181242 +Node: How can I have multiple input sources feed into the same scanner at the same time?182015 +Node: Can I build nested parsers that work with the same input file?184000 +Node: How can I match text only at the end of a file?185007 +Node: How can I make REJECT cascade across start condition boundaries?185811 +Node: Why cant I use fast or full tables with interactive mode?186825 +Node: How much faster is -F or -f than -C?188082 +Node: If I have a simple grammar cant I just parse it with flex?188394 +Node: Why doesn't yyrestart() set the start state back to INITIAL?188876 +Node: How can I match C-style comments?189503 +Node: The period isn't working the way I expected.190313 +Node: Can I get the flex manual in another format?191558 +Node: Does there exist a "faster" NDFA->DFA algorithm?192048 +Node: How does flex compile the DFA so quickly?192558 +Node: How can I use more than 8192 rules?193524 +Node: How do I abandon a file in the middle of a scan and switch to a new file?194934 +Node: How do I execute code only during initialization (only before the first scan)?195488 +Node: How do I execute code at termination?196265 +Node: Where else can I find help?196591 +Node: Can I include comments in the "rules" section of the file?196965 +Node: I get an error about undefined yywrap().197345 +Node: How can I change the matching pattern at run time?197821 +Node: How can I expand macros in the input?198183 +Node: How can I build a two-pass scanner?199215 +Node: How do I match any string not matched in the preceding rules?200133 +Node: I am trying to port code from AT&T lex that uses yysptr and yysbuf.201042 +Node: Is there a way to make flex treat NULL like a regular character?201837 +Node: Whenever flex can not match the input it says "flex scanner jammed".202357 +Node: Why doesn't flex have non-greedy operators like perl does?203000 +Node: Memory leak - 16386 bytes allocated by malloc.204353 +Ref: faq-memory-leak204651 +Node: How do I track the byte offset for lseek()?205622 +Node: How do I use my own I/O classes in a C++ scanner?207131 +Node: How do I skip as many chars as possible?207974 +Node: deleteme00209049 +Node: Are certain equivalent patterns faster than others?209489 +Node: Is backing up a big deal?212907 +Node: Can I fake multi-byte character support?214813 +Node: deleteme01216254 +Node: Can you discuss some flex internals?217363 +Node: unput() messes up yy_at_bol219607 +Node: The | operator is not doing what I want220709 +Node: Why can't flex understand this variable trailing context pattern?222255 +Node: The ^ operator isn't working223504 +Node: Trailing context is getting confused with trailing optional patterns224739 +Node: Is flex GNU or not?225982 +Node: ERASEME53227655 +Node: I need to scan if-then-else blocks and while loops228425 +Node: ERASEME55229624 +Node: ERASEME56230722 +Node: ERASEME57232080 +Node: Is there a repository for flex scanners?233078 +Node: How can I conditionally compile or preprocess my flex input file?233394 +Node: Where can I find grammars for lex and yacc?233867 +Node: I get an end-of-buffer message for each character scanned.234214 +Node: unnamed-faq-62234809 +Node: unnamed-faq-63235827 +Node: unnamed-faq-64237124 +Node: unnamed-faq-65238090 +Node: unnamed-faq-66238876 +Node: unnamed-faq-67239991 +Node: unnamed-faq-68240978 +Node: unnamed-faq-69242120 +Node: unnamed-faq-70242833 +Node: unnamed-faq-71243594 +Node: unnamed-faq-72244803 +Node: unnamed-faq-73245846 +Node: unnamed-faq-74246770 +Node: unnamed-faq-75247715 +Node: unnamed-faq-76248847 +Node: unnamed-faq-77249553 +Node: unnamed-faq-78250446 +Node: unnamed-faq-79251444 +Node: unnamed-faq-80253144 +Node: unnamed-faq-81254462 +Node: unnamed-faq-82257262 +Node: unnamed-faq-83258219 +Node: unnamed-faq-84259999 +Node: unnamed-faq-85261102 +Node: unnamed-faq-86262109 +Node: unnamed-faq-87263047 +Node: unnamed-faq-88263693 +Node: unnamed-faq-90264524 +Node: unnamed-faq-91265787 +Node: unnamed-faq-92268215 +Node: unnamed-faq-93268714 +Node: unnamed-faq-94269641 +Node: unnamed-faq-95271053 +Node: unnamed-faq-96272571 +Node: unnamed-faq-97273330 +Node: unnamed-faq-98273997 +Node: unnamed-faq-99274662 +Node: unnamed-faq-100275591 +Node: unnamed-faq-101276301 +Node: What is the difference between YYLEX_PARAM and YY_DECL?277114 +Node: Why do I get "conflicting types for yylex" error?277638 +Node: How do I access the values set in a Flex action from within a Bison action?278168 +Node: Appendices278597 +Node: Makefiles and Flex278862 +Ref: Makefiles and Flex-Footnote-1282064 +Ref: Makefiles and Flex-Footnote-2282181 +Ref: Makefiles and Flex-Footnote-3282368 +Node: Bison Bridge282419 +Ref: Bison Bridge-Footnote-1285086 +Node: M4 Dependency285278 +Ref: M4 Dependency-Footnote-1286692 +Node: Common Patterns286828 +Node: Numbers287151 +Node: Identifiers288127 +Node: Quoted Constructs288954 +Node: Addresses290008 +Node: Indices291320 +Node: Concept Index291612 +Node: Index of Functions and Macros318745 +Node: Index of Variables323714 +Node: Index of Data Types325380 +Node: Index of Hooks326268 +Node: Index of Scanner Options326836 + +End Tag Table diff --git a/doc/flex.info-1 b/doc/flex.info-1 new file mode 100644 index 0000000..b79df5e --- /dev/null +++ b/doc/flex.info-1 @@ -0,0 +1,7791 @@ +This is flex.info, produced by makeinfo version 6.1 from flex.texi. + +The flex manual is placed under the same licensing conditions as the +rest of flex: + + Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2012 The Flex +Project. + + Copyright (C) 1990, 1997 The Regents of the University of California. +All rights reserved. + + This code is derived from software contributed to Berkeley by Vern +Paxson. + + The United States Government has rights in this work pursuant to +contract no. DE-AC03-76SF00098 between the United States Department of +Energy and the University of California. + + Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + + Neither the name of the University nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. +INFO-DIR-SECTION Programming +START-INFO-DIR-ENTRY +* flex: (flex). Fast lexical analyzer generator (lex replacement). +END-INFO-DIR-ENTRY + + +File: flex.info, Node: Top, Next: Copyright, Prev: (dir), Up: (dir) + +flex +**** + +This manual describes 'flex', a tool for generating programs that +perform pattern-matching on text. The manual includes both tutorial and +reference sections. + + This edition of 'The flex Manual' documents 'flex' version 2.6.4. It +was last updated on 6 May 2017. + + This manual was written by Vern Paxson, Will Estes and John Millaway. + +* Menu: + +* Copyright:: +* Reporting Bugs:: +* Introduction:: +* Simple Examples:: +* Format:: +* Patterns:: +* Matching:: +* Actions:: +* Generated Scanner:: +* Start Conditions:: +* Multiple Input Buffers:: +* EOF:: +* Misc Macros:: +* User Values:: +* Yacc:: +* Scanner Options:: +* Performance:: +* Cxx:: +* Reentrant:: +* Lex and Posix:: +* Memory Management:: +* Serialized Tables:: +* Diagnostics:: +* Limitations:: +* Bibliography:: +* FAQ:: +* Appendices:: +* Indices:: + + -- The Detailed Node Listing -- + +Format of the Input File + +* Definitions Section:: +* Rules Section:: +* User Code Section:: +* Comments in the Input:: + +Scanner Options + +* Options for Specifying Filenames:: +* Options Affecting Scanner Behavior:: +* Code-Level And API Options:: +* Options for Scanner Speed and Size:: +* Debugging Options:: +* Miscellaneous Options:: + +Reentrant C Scanners + +* Reentrant Uses:: +* Reentrant Overview:: +* Reentrant Example:: +* Reentrant Detail:: +* Reentrant Functions:: + +The Reentrant API in Detail + +* Specify Reentrant:: +* Extra Reentrant Argument:: +* Global Replacement:: +* Init and Destroy Functions:: +* Accessor Methods:: +* Extra Data:: +* About yyscan_t:: + +Memory Management + +* The Default Memory Management:: +* Overriding The Default Memory Management:: +* A Note About yytext And Memory:: + +Serialized Tables + +* Creating Serialized Tables:: +* Loading and Unloading Serialized Tables:: +* Tables File Format:: + +FAQ + +* When was flex born?:: +* How do I expand backslash-escape sequences in C-style quoted strings?:: +* Why do flex scanners call fileno if it is not ANSI compatible?:: +* Does flex support recursive pattern definitions?:: +* How do I skip huge chunks of input (tens of megabytes) while using flex?:: +* Flex is not matching my patterns in the same order that I defined them.:: +* My actions are executing out of order or sometimes not at all.:: +* How can I have multiple input sources feed into the same scanner at the same time?:: +* Can I build nested parsers that work with the same input file?:: +* How can I match text only at the end of a file?:: +* How can I make REJECT cascade across start condition boundaries?:: +* Why cant I use fast or full tables with interactive mode?:: +* How much faster is -F or -f than -C?:: +* If I have a simple grammar cant I just parse it with flex?:: +* Why doesn't yyrestart() set the start state back to INITIAL?:: +* How can I match C-style comments?:: +* The period isn't working the way I expected.:: +* Can I get the flex manual in another format?:: +* Does there exist a "faster" NDFA->DFA algorithm?:: +* How does flex compile the DFA so quickly?:: +* How can I use more than 8192 rules?:: +* How do I abandon a file in the middle of a scan and switch to a new file?:: +* How do I execute code only during initialization (only before the first scan)?:: +* How do I execute code at termination?:: +* Where else can I find help?:: +* Can I include comments in the "rules" section of the file?:: +* I get an error about undefined yywrap().:: +* How can I change the matching pattern at run time?:: +* How can I expand macros in the input?:: +* How can I build a two-pass scanner?:: +* How do I match any string not matched in the preceding rules?:: +* I am trying to port code from AT&T lex that uses yysptr and yysbuf.:: +* Is there a way to make flex treat NULL like a regular character?:: +* Whenever flex can not match the input it says "flex scanner jammed".:: +* Why doesn't flex have non-greedy operators like perl does?:: +* Memory leak - 16386 bytes allocated by malloc.:: +* How do I track the byte offset for lseek()?:: +* How do I use my own I/O classes in a C++ scanner?:: +* How do I skip as many chars as possible?:: +* deleteme00:: +* Are certain equivalent patterns faster than others?:: +* Is backing up a big deal?:: +* Can I fake multi-byte character support?:: +* deleteme01:: +* Can you discuss some flex internals?:: +* unput() messes up yy_at_bol:: +* The | operator is not doing what I want:: +* Why can't flex understand this variable trailing context pattern?:: +* The ^ operator isn't working:: +* Trailing context is getting confused with trailing optional patterns:: +* Is flex GNU or not?:: +* ERASEME53:: +* I need to scan if-then-else blocks and while loops:: +* ERASEME55:: +* ERASEME56:: +* ERASEME57:: +* Is there a repository for flex scanners?:: +* How can I conditionally compile or preprocess my flex input file?:: +* Where can I find grammars for lex and yacc?:: +* I get an end-of-buffer message for each character scanned.:: +* unnamed-faq-62:: +* unnamed-faq-63:: +* unnamed-faq-64:: +* unnamed-faq-65:: +* unnamed-faq-66:: +* unnamed-faq-67:: +* unnamed-faq-68:: +* unnamed-faq-69:: +* unnamed-faq-70:: +* unnamed-faq-71:: +* unnamed-faq-72:: +* unnamed-faq-73:: +* unnamed-faq-74:: +* unnamed-faq-75:: +* unnamed-faq-76:: +* unnamed-faq-77:: +* unnamed-faq-78:: +* unnamed-faq-79:: +* unnamed-faq-80:: +* unnamed-faq-81:: +* unnamed-faq-82:: +* unnamed-faq-83:: +* unnamed-faq-84:: +* unnamed-faq-85:: +* unnamed-faq-86:: +* unnamed-faq-87:: +* unnamed-faq-88:: +* unnamed-faq-90:: +* unnamed-faq-91:: +* unnamed-faq-92:: +* unnamed-faq-93:: +* unnamed-faq-94:: +* unnamed-faq-95:: +* unnamed-faq-96:: +* unnamed-faq-97:: +* unnamed-faq-98:: +* unnamed-faq-99:: +* unnamed-faq-100:: +* unnamed-faq-101:: +* What is the difference between YYLEX_PARAM and YY_DECL?:: +* Why do I get "conflicting types for yylex" error?:: +* How do I access the values set in a Flex action from within a Bison action?:: + +Appendices + +* Makefiles and Flex:: +* Bison Bridge:: +* M4 Dependency:: +* Common Patterns:: + +Indices + +* Concept Index:: +* Index of Functions and Macros:: +* Index of Variables:: +* Index of Data Types:: +* Index of Hooks:: +* Index of Scanner Options:: + + + +File: flex.info, Node: Copyright, Next: Reporting Bugs, Prev: Top, Up: Top + +1 Copyright +*********** + +The flex manual is placed under the same licensing conditions as the +rest of flex: + + Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2012 The Flex +Project. + + Copyright (C) 1990, 1997 The Regents of the University of California. +All rights reserved. + + This code is derived from software contributed to Berkeley by Vern +Paxson. + + The United States Government has rights in this work pursuant to +contract no. DE-AC03-76SF00098 between the United States Department of +Energy and the University of California. + + Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + + Neither the name of the University nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + + +File: flex.info, Node: Reporting Bugs, Next: Introduction, Prev: Copyright, Up: Top + +2 Reporting Bugs +**************** + +If you find a bug in 'flex', please report it using GitHub's issue +tracking facility at <https://github.com/westes/flex/issues/> + + +File: flex.info, Node: Introduction, Next: Simple Examples, Prev: Reporting Bugs, Up: Top + +3 Introduction +************** + +'flex' is a tool for generating "scanners". A scanner is a program +which recognizes lexical patterns in text. The 'flex' program reads the +given input files, or its standard input if no file names are given, for +a description of a scanner to generate. The description is in the form +of pairs of regular expressions and C code, called "rules". 'flex' +generates as output a C source file, 'lex.yy.c' by default, which +defines a routine 'yylex()'. This file can be compiled and linked with +the flex runtime library to produce an executable. When the executable +is run, it analyzes its input for occurrences of the regular +expressions. Whenever it finds one, it executes the corresponding C +code. + + +File: flex.info, Node: Simple Examples, Next: Format, Prev: Introduction, Up: Top + +4 Some Simple Examples +********************** + +First some simple examples to get the flavor of how one uses 'flex'. + + The following 'flex' input specifies a scanner which, when it +encounters the string 'username' will replace it with the user's login +name: + + %% + username printf( "%s", getlogin() ); + + By default, any text not matched by a 'flex' scanner is copied to the +output, so the net effect of this scanner is to copy its input file to +its output with each occurrence of 'username' expanded. In this input, +there is just one rule. 'username' is the "pattern" and the 'printf' is +the "action". The '%%' symbol marks the beginning of the rules. + + Here's another simple example: + + int num_lines = 0, num_chars = 0; + + %% + \n ++num_lines; ++num_chars; + . ++num_chars; + + %% + + int main() + { + yylex(); + printf( "# of lines = %d, # of chars = %d\n", + num_lines, num_chars ); + } + + This scanner counts the number of characters and the number of lines +in its input. It produces no output other than the final report on the +character and line counts. The first line declares two globals, +'num_lines' and 'num_chars', which are accessible both inside 'yylex()' +and in the 'main()' routine declared after the second '%%'. There are +two rules, one which matches a newline ('\n') and increments both the +line count and the character count, and one which matches any character +other than a newline (indicated by the '.' regular expression). + + A somewhat more complicated example: + + /* scanner for a toy Pascal-like language */ + + %{ + /* need this for the call to atof() below */ + #include <math.h> + %} + + DIGIT [0-9] + ID [a-z][a-z0-9]* + + %% + + {DIGIT}+ { + printf( "An integer: %s (%d)\n", yytext, + atoi( yytext ) ); + } + + {DIGIT}+"."{DIGIT}* { + printf( "A float: %s (%g)\n", yytext, + atof( yytext ) ); + } + + if|then|begin|end|procedure|function { + printf( "A keyword: %s\n", yytext ); + } + + {ID} printf( "An identifier: %s\n", yytext ); + + "+"|"-"|"*"|"/" printf( "An operator: %s\n", yytext ); + + "{"[^{}\n]*"}" /* eat up one-line comments */ + + [ \t\n]+ /* eat up whitespace */ + + . printf( "Unrecognized character: %s\n", yytext ); + + %% + + int main( int argc, char **argv ) + { + ++argv, --argc; /* skip over program name */ + if ( argc > 0 ) + yyin = fopen( argv[0], "r" ); + else + yyin = stdin; + + yylex(); + } + + This is the beginnings of a simple scanner for a language like +Pascal. It identifies different types of "tokens" and reports on what +it has seen. + + The details of this example will be explained in the following +sections. + + +File: flex.info, Node: Format, Next: Patterns, Prev: Simple Examples, Up: Top + +5 Format of the Input File +************************** + +The 'flex' input file consists of three sections, separated by a line +containing only '%%'. + + definitions + %% + rules + %% + user code + +* Menu: + +* Definitions Section:: +* Rules Section:: +* User Code Section:: +* Comments in the Input:: + + +File: flex.info, Node: Definitions Section, Next: Rules Section, Prev: Format, Up: Format + +5.1 Format of the Definitions Section +===================================== + +The "definitions section" contains declarations of simple "name" +definitions to simplify the scanner specification, and declarations of +"start conditions", which are explained in a later section. + + Name definitions have the form: + + name definition + + The 'name' is a word beginning with a letter or an underscore ('_') +followed by zero or more letters, digits, '_', or '-' (dash). The +definition is taken to begin at the first non-whitespace character +following the name and continuing to the end of the line. The +definition can subsequently be referred to using '{name}', which will +expand to '(definition)'. For example, + + DIGIT [0-9] + ID [a-z][a-z0-9]* + + Defines 'DIGIT' to be a regular expression which matches a single +digit, and 'ID' to be a regular expression which matches a letter +followed by zero-or-more letters-or-digits. A subsequent reference to + + {DIGIT}+"."{DIGIT}* + + is identical to + + ([0-9])+"."([0-9])* + + and matches one-or-more digits followed by a '.' followed by +zero-or-more digits. + + An unindented comment (i.e., a line beginning with '/*') is copied +verbatim to the output up to the next '*/'. + + Any _indented_ text or text enclosed in '%{' and '%}' is also copied +verbatim to the output (with the %{ and %} symbols removed). The %{ and +%} symbols must appear unindented on lines by themselves. + + A '%top' block is similar to a '%{' ... '%}' block, except that the +code in a '%top' block is relocated to the _top_ of the generated file, +before any flex definitions (1). The '%top' block is useful when you +want certain preprocessor macros to be defined or certain files to be +included before the generated code. The single characters, '{' and '}' +are used to delimit the '%top' block, as show in the example below: + + %top{ + /* This code goes at the "top" of the generated file. */ + #include <stdint.h> + #include <inttypes.h> + } + + Multiple '%top' blocks are allowed, and their order is preserved. + + ---------- Footnotes ---------- + + (1) Actually, 'yyIN_HEADER' is defined before the '%top' block. + + +File: flex.info, Node: Rules Section, Next: User Code Section, Prev: Definitions Section, Up: Format + +5.2 Format of the Rules Section +=============================== + +The "rules" section of the 'flex' input contains a series of rules of +the form: + + pattern action + + where the pattern must be unindented and the action must begin on the +same line. *Note Patterns::, for a further description of patterns and +actions. + + In the rules section, any indented or %{ %} enclosed text appearing +before the first rule may be used to declare variables which are local +to the scanning routine and (after the declarations) code which is to be +executed whenever the scanning routine is entered. Other indented or %{ +%} text in the rule section is still copied to the output, but its +meaning is not well-defined and it may well cause compile-time errors +(this feature is present for POSIX compliance. *Note Lex and Posix::, +for other such features). + + Any _indented_ text or text enclosed in '%{' and '%}' is copied +verbatim to the output (with the %{ and %} symbols removed). The %{ and +%} symbols must appear unindented on lines by themselves. + + +File: flex.info, Node: User Code Section, Next: Comments in the Input, Prev: Rules Section, Up: Format + +5.3 Format of the User Code Section +=================================== + +The user code section is simply copied to 'lex.yy.c' verbatim. It is +used for companion routines which call or are called by the scanner. +The presence of this section is optional; if it is missing, the second +'%%' in the input file may be skipped, too. + + +File: flex.info, Node: Comments in the Input, Prev: User Code Section, Up: Format + +5.4 Comments in the Input +========================= + +Flex supports C-style comments, that is, anything between '/*' and '*/' +is considered a comment. Whenever flex encounters a comment, it copies +the entire comment verbatim to the generated source code. Comments may +appear just about anywhere, but with the following exceptions: + + * Comments may not appear in the Rules Section wherever flex is + expecting a regular expression. This means comments may not appear + at the beginning of a line, or immediately following a list of + scanner states. + * Comments may not appear on an '%option' line in the Definitions + Section. + + If you want to follow a simple rule, then always begin a comment on a +new line, with one or more whitespace characters before the initial +'/*'). This rule will work anywhere in the input file. + + All the comments in the following example are valid: + + %{ + /* code block */ + %} + + /* Definitions Section */ + %x STATE_X + + %% + /* Rules Section */ + ruleA /* after regex */ { /* code block */ } /* after code block */ + /* Rules Section (indented) */ + <STATE_X>{ + ruleC ECHO; + ruleD ECHO; + %{ + /* code block */ + %} + } + %% + /* User Code Section */ + + + +File: flex.info, Node: Patterns, Next: Matching, Prev: Format, Up: Top + +6 Patterns +********** + +The patterns in the input (see *note Rules Section::) are written using +an extended set of regular expressions. These are: + +'x' + match the character 'x' + +'.' + any character (byte) except newline + +'[xyz]' + a "character class"; in this case, the pattern matches either an + 'x', a 'y', or a 'z' + +'[abj-oZ]' + a "character class" with a range in it; matches an 'a', a 'b', any + letter from 'j' through 'o', or a 'Z' + +'[^A-Z]' + a "negated character class", i.e., any character but those in the + class. In this case, any character EXCEPT an uppercase letter. + +'[^A-Z\n]' + any character EXCEPT an uppercase letter or a newline + +'[a-z]{-}[aeiou]' + the lowercase consonants + +'r*' + zero or more r's, where r is any regular expression + +'r+' + one or more r's + +'r?' + zero or one r's (that is, "an optional r") + +'r{2,5}' + anywhere from two to five r's + +'r{2,}' + two or more r's + +'r{4}' + exactly 4 r's + +'{name}' + the expansion of the 'name' definition (*note Format::). + +'"[xyz]\"foo"' + the literal string: '[xyz]"foo' + +'\X' + if X is 'a', 'b', 'f', 'n', 'r', 't', or 'v', then the ANSI-C + interpretation of '\x'. Otherwise, a literal 'X' (used to escape + operators such as '*') + +'\0' + a NUL character (ASCII code 0) + +'\123' + the character with octal value 123 + +'\x2a' + the character with hexadecimal value 2a + +'(r)' + match an 'r'; parentheses are used to override precedence (see + below) + +'(?r-s:pattern)' + apply option 'r' and omit option 's' while interpreting pattern. + Options may be zero or more of the characters 'i', 's', or 'x'. + + 'i' means case-insensitive. '-i' means case-sensitive. + + 's' alters the meaning of the '.' syntax to match any single byte + whatsoever. '-s' alters the meaning of '.' to match any byte + except '\n'. + + 'x' ignores comments and whitespace in patterns. Whitespace is + ignored unless it is backslash-escaped, contained within '""'s, or + appears inside a character class. + + The following are all valid: + + (?:foo) same as (foo) + (?i:ab7) same as ([aA][bB]7) + (?-i:ab) same as (ab) + (?s:.) same as [\x00-\xFF] + (?-s:.) same as [^\n] + (?ix-s: a . b) same as ([Aa][^\n][bB]) + (?x:a b) same as ("ab") + (?x:a\ b) same as ("a b") + (?x:a" "b) same as ("a b") + (?x:a[ ]b) same as ("a b") + (?x:a + /* comment */ + b + c) same as (abc) + +'(?# comment )' + omit everything within '()'. The first ')' character encountered + ends the pattern. It is not possible to for the comment to contain + a ')' character. The comment may span lines. + +'rs' + the regular expression 'r' followed by the regular expression 's'; + called "concatenation" + +'r|s' + either an 'r' or an 's' + +'r/s' + an 'r' but only if it is followed by an 's'. The text matched by + 's' is included when determining whether this rule is the longest + match, but is then returned to the input before the action is + executed. So the action only sees the text matched by 'r'. This + type of pattern is called "trailing context". (There are some + combinations of 'r/s' that flex cannot match correctly. *Note + Limitations::, regarding dangerous trailing context.) + +'^r' + an 'r', but only at the beginning of a line (i.e., when just + starting to scan, or right after a newline has been scanned). + +'r$' + an 'r', but only at the end of a line (i.e., just before a + newline). Equivalent to 'r/\n'. + + Note that 'flex''s notion of "newline" is exactly whatever the C + compiler used to compile 'flex' interprets '\n' as; in particular, + on some DOS systems you must either filter out '\r's in the input + yourself, or explicitly use 'r/\r\n' for 'r$'. + +'<s>r' + an 'r', but only in start condition 's' (see *note Start + Conditions:: for discussion of start conditions). + +'<s1,s2,s3>r' + same, but in any of start conditions 's1', 's2', or 's3'. + +'<*>r' + an 'r' in any start condition, even an exclusive one. + +'<<EOF>>' + an end-of-file. + +'<s1,s2><<EOF>>' + an end-of-file when in start condition 's1' or 's2' + + Note that inside of a character class, all regular expression +operators lose their special meaning except escape ('\') and the +character class operators, '-', ']]', and, at the beginning of the +class, '^'. + + The regular expressions listed above are grouped according to +precedence, from highest precedence at the top to lowest at the bottom. +Those grouped together have equal precedence (see special note on the +precedence of the repeat operator, '{}', under the documentation for the +'--posix' POSIX compliance option). For example, + + foo|bar* + + is the same as + + (foo)|(ba(r*)) + + since the '*' operator has higher precedence than concatenation, and +concatenation higher than alternation ('|'). This pattern therefore +matches _either_ the string 'foo' _or_ the string 'ba' followed by +zero-or-more 'r''s. To match 'foo' or zero-or-more repetitions of the +string 'bar', use: + + foo|(bar)* + + And to match a sequence of zero or more repetitions of 'foo' and +'bar': + + (foo|bar)* + + In addition to characters and ranges of characters, character classes +can also contain "character class expressions". These are expressions +enclosed inside '[:' and ':]' delimiters (which themselves must appear +between the '[' and ']' of the character class. Other elements may +occur inside the character class, too). The valid expressions are: + + [:alnum:] [:alpha:] [:blank:] + [:cntrl:] [:digit:] [:graph:] + [:lower:] [:print:] [:punct:] + [:space:] [:upper:] [:xdigit:] + + These expressions all designate a set of characters equivalent to the +corresponding standard C 'isXXX' function. For example, '[:alnum:]' +designates those characters for which 'isalnum()' returns true - i.e., +any alphabetic or numeric character. Some systems don't provide +'isblank()', so flex defines '[:blank:]' as a blank or a tab. + + For example, the following character classes are all equivalent: + + [[:alnum:]] + [[:alpha:][:digit:]] + [[:alpha:][0-9]] + [a-zA-Z0-9] + + A word of caution. Character classes are expanded immediately when +seen in the 'flex' input. This means the character classes are +sensitive to the locale in which 'flex' is executed, and the resulting +scanner will not be sensitive to the runtime locale. This may or may +not be desirable. + + * If your scanner is case-insensitive (the '-i' flag), then + '[:upper:]' and '[:lower:]' are equivalent to '[:alpha:]'. + + * Character classes with ranges, such as '[a-Z]', should be used with + caution in a case-insensitive scanner if the range spans upper or + lowercase characters. Flex does not know if you want to fold all + upper and lowercase characters together, or if you want the literal + numeric range specified (with no case folding). When in doubt, + flex will assume that you meant the literal numeric range, and will + issue a warning. The exception to this rule is a character range + such as '[a-z]' or '[S-W]' where it is obvious that you want + case-folding to occur. Here are some examples with the '-i' flag + enabled: + + Range Result Literal Range Alternate Range + '[a-t]' ok '[a-tA-T]' + '[A-T]' ok '[a-tA-T]' + '[A-t]' ambiguous '[A-Z\[\\\]_`a-t]' '[a-tA-T]' + '[_-{]' ambiguous '[_`a-z{]' '[_`a-zA-Z{]' + '[@-C]' ambiguous '[@ABC]' '[@A-Z\[\\\]_`abc]' + + * A negated character class such as the example '[^A-Z]' above _will_ + match a newline unless '\n' (or an equivalent escape sequence) is + one of the characters explicitly present in the negated character + class (e.g., '[^A-Z\n]'). This is unlike how many other regular + expression tools treat negated character classes, but unfortunately + the inconsistency is historically entrenched. Matching newlines + means that a pattern like '[^"]*' can match the entire input unless + there's another quote in the input. + + Flex allows negation of character class expressions by prepending + '^' to the POSIX character class name. + + [:^alnum:] [:^alpha:] [:^blank:] + [:^cntrl:] [:^digit:] [:^graph:] + [:^lower:] [:^print:] [:^punct:] + [:^space:] [:^upper:] [:^xdigit:] + + Flex will issue a warning if the expressions '[:^upper:]' and + '[:^lower:]' appear in a case-insensitive scanner, since their + meaning is unclear. The current behavior is to skip them entirely, + but this may change without notice in future revisions of flex. + + * + The '{-}' operator computes the difference of two character + classes. For example, '[a-c]{-}[b-z]' represents all the + characters in the class '[a-c]' that are not in the class '[b-z]' + (which in this case, is just the single character 'a'). The '{-}' + operator is left associative, so '[abc]{-}[b]{-}[c]' is the same as + '[a]'. Be careful not to accidentally create an empty set, which + will never match. + + * + The '{+}' operator computes the union of two character classes. + For example, '[a-z]{+}[0-9]' is the same as '[a-z0-9]'. This + operator is useful when preceded by the result of a difference + operation, as in, '[[:alpha:]]{-}[[:lower:]]{+}[q]', which is + equivalent to '[A-Zq]' in the "C" locale. + + * A rule can have at most one instance of trailing context (the '/' + operator or the '$' operator). The start condition, '^', and + '<<EOF>>' patterns can only occur at the beginning of a pattern, + and, as well as with '/' and '$', cannot be grouped inside + parentheses. A '^' which does not occur at the beginning of a rule + or a '$' which does not occur at the end of a rule loses its + special properties and is treated as a normal character. + + * The following are invalid: + + foo/bar$ + <sc1>foo<sc2>bar + + Note that the first of these can be written 'foo/bar\n'. + + * The following will result in '$' or '^' being treated as a normal + character: + + foo|(bar$) + foo|^bar + + If the desired meaning is a 'foo' or a 'bar'-followed-by-a-newline, + the following could be used (the special '|' action is explained + below, *note Actions::): + + foo | + bar$ /* action goes here */ + + A similar trick will work for matching a 'foo' or a + 'bar'-at-the-beginning-of-a-line. + + +File: flex.info, Node: Matching, Next: Actions, Prev: Patterns, Up: Top + +7 How the Input Is Matched +************************** + +When the generated scanner is run, it analyzes its input looking for +strings which match any of its patterns. If it finds more than one +match, it takes the one matching the most text (for trailing context +rules, this includes the length of the trailing part, even though it +will then be returned to the input). If it finds two or more matches of +the same length, the rule listed first in the 'flex' input file is +chosen. + + Once the match is determined, the text corresponding to the match +(called the "token") is made available in the global character pointer +'yytext', and its length in the global integer 'yyleng'. The "action" +corresponding to the matched pattern is then executed (*note Actions::), +and then the remaining input is scanned for another match. + + If no match is found, then the "default rule" is executed: the next +character in the input is considered matched and copied to the standard +output. Thus, the simplest valid 'flex' input is: + + %% + + which generates a scanner that simply copies its input (one character +at a time) to its output. + + Note that 'yytext' can be defined in two different ways: either as a +character _pointer_ or as a character _array_. You can control which +definition 'flex' uses by including one of the special directives +'%pointer' or '%array' in the first (definitions) section of your flex +input. The default is '%pointer', unless you use the '-l' lex +compatibility option, in which case 'yytext' will be an array. The +advantage of using '%pointer' is substantially faster scanning and no +buffer overflow when matching very large tokens (unless you run out of +dynamic memory). The disadvantage is that you are restricted in how +your actions can modify 'yytext' (*note Actions::), and calls to the +'unput()' function destroys the present contents of 'yytext', which can +be a considerable porting headache when moving between different 'lex' +versions. + + The advantage of '%array' is that you can then modify 'yytext' to +your heart's content, and calls to 'unput()' do not destroy 'yytext' +(*note Actions::). Furthermore, existing 'lex' programs sometimes +access 'yytext' externally using declarations of the form: + + extern char yytext[]; + + This definition is erroneous when used with '%pointer', but correct +for '%array'. + + The '%array' declaration defines 'yytext' to be an array of 'YYLMAX' +characters, which defaults to a fairly large value. You can change the +size by simply #define'ing 'YYLMAX' to a different value in the first +section of your 'flex' input. As mentioned above, with '%pointer' +yytext grows dynamically to accommodate large tokens. While this means +your '%pointer' scanner can accommodate very large tokens (such as +matching entire blocks of comments), bear in mind that each time the +scanner must resize 'yytext' it also must rescan the entire token from +the beginning, so matching such tokens can prove slow. 'yytext' +presently does _not_ dynamically grow if a call to 'unput()' results in +too much text being pushed back; instead, a run-time error results. + + Also note that you cannot use '%array' with C++ scanner classes +(*note Cxx::). + + +File: flex.info, Node: Actions, Next: Generated Scanner, Prev: Matching, Up: Top + +8 Actions +********* + +Each pattern in a rule has a corresponding "action", which can be any +arbitrary C statement. The pattern ends at the first non-escaped +whitespace character; the remainder of the line is its action. If the +action is empty, then when the pattern is matched the input token is +simply discarded. For example, here is the specification for a program +which deletes all occurrences of 'zap me' from its input: + + %% + "zap me" + + This example will copy all other characters in the input to the +output since they will be matched by the default rule. + + Here is a program which compresses multiple blanks and tabs down to a +single blank, and throws away whitespace found at the end of a line: + + %% + [ \t]+ putchar( ' ' ); + [ \t]+$ /* ignore this token */ + + If the action contains a '{', then the action spans till the +balancing '}' is found, and the action may cross multiple lines. 'flex' +knows about C strings and comments and won't be fooled by braces found +within them, but also allows actions to begin with '%{' and will +consider the action to be all the text up to the next '%}' (regardless +of ordinary braces inside the action). + + An action consisting solely of a vertical bar ('|') means "same as +the action for the next rule". See below for an illustration. + + Actions can include arbitrary C code, including 'return' statements +to return a value to whatever routine called 'yylex()'. Each time +'yylex()' is called it continues processing tokens from where it last +left off until it either reaches the end of the file or executes a +return. + + Actions are free to modify 'yytext' except for lengthening it (adding +characters to its end-these will overwrite later characters in the input +stream). This however does not apply when using '%array' (*note +Matching::). In that case, 'yytext' may be freely modified in any way. + + Actions are free to modify 'yyleng' except they should not do so if +the action also includes use of 'yymore()' (see below). + + There are a number of special directives which can be included within +an action: + +'ECHO' + copies yytext to the scanner's output. + +'BEGIN' + followed by the name of a start condition places the scanner in the + corresponding start condition (see below). + +'REJECT' + directs the scanner to proceed on to the "second best" rule which + matched the input (or a prefix of the input). The rule is chosen + as described above in *note Matching::, and 'yytext' and 'yyleng' + set up appropriately. It may either be one which matched as much + text as the originally chosen rule but came later in the 'flex' + input file, or one which matched less text. For example, the + following will both count the words in the input and call the + routine 'special()' whenever 'frob' is seen: + + int word_count = 0; + %% + + frob special(); REJECT; + [^ \t\n]+ ++word_count; + + Without the 'REJECT', any occurrences of 'frob' in the input would + not be counted as words, since the scanner normally executes only + one action per token. Multiple uses of 'REJECT' are allowed, each + one finding the next best choice to the currently active rule. For + example, when the following scanner scans the token 'abcd', it will + write 'abcdabcaba' to the output: + + %% + a | + ab | + abc | + abcd ECHO; REJECT; + .|\n /* eat up any unmatched character */ + + The first three rules share the fourth's action since they use the + special '|' action. + + 'REJECT' is a particularly expensive feature in terms of scanner + performance; if it is used in _any_ of the scanner's actions it + will slow down _all_ of the scanner's matching. Furthermore, + 'REJECT' cannot be used with the '-Cf' or '-CF' options (*note + Scanner Options::). + + Note also that unlike the other special actions, 'REJECT' is a + _branch_. Code immediately following it in the action will _not_ + be executed. + +'yymore()' + tells the scanner that the next time it matches a rule, the + corresponding token should be _appended_ onto the current value of + 'yytext' rather than replacing it. For example, given the input + 'mega-kludge' the following will write 'mega-mega-kludge' to the + output: + + %% + mega- ECHO; yymore(); + kludge ECHO; + + First 'mega-' is matched and echoed to the output. Then 'kludge' + is matched, but the previous 'mega-' is still hanging around at the + beginning of 'yytext' so the 'ECHO' for the 'kludge' rule will + actually write 'mega-kludge'. + + Two notes regarding use of 'yymore()'. First, 'yymore()' depends on +the value of 'yyleng' correctly reflecting the size of the current +token, so you must not modify 'yyleng' if you are using 'yymore()'. +Second, the presence of 'yymore()' in the scanner's action entails a +minor performance penalty in the scanner's matching speed. + + 'yyless(n)' returns all but the first 'n' characters of the current +token back to the input stream, where they will be rescanned when the +scanner looks for the next match. 'yytext' and 'yyleng' are adjusted +appropriately (e.g., 'yyleng' will now be equal to 'n'). For example, +on the input 'foobar' the following will write out 'foobarbar': + + %% + foobar ECHO; yyless(3); + [a-z]+ ECHO; + + An argument of 0 to 'yyless()' will cause the entire current input +string to be scanned again. Unless you've changed how the scanner will +subsequently process its input (using 'BEGIN', for example), this will +result in an endless loop. + + Note that 'yyless()' is a macro and can only be used in the flex +input file, not from other source files. + + 'unput(c)' puts the character 'c' back onto the input stream. It +will be the next character scanned. The following action will take the +current token and cause it to be rescanned enclosed in parentheses. + + { + int i; + /* Copy yytext because unput() trashes yytext */ + char *yycopy = strdup( yytext ); + unput( ')' ); + for ( i = yyleng - 1; i >= 0; --i ) + unput( yycopy[i] ); + unput( '(' ); + free( yycopy ); + } + + Note that since each 'unput()' puts the given character back at the +_beginning_ of the input stream, pushing back strings must be done +back-to-front. + + An important potential problem when using 'unput()' is that if you +are using '%pointer' (the default), a call to 'unput()' _destroys_ the +contents of 'yytext', starting with its rightmost character and +devouring one character to the left with each call. If you need the +value of 'yytext' preserved after a call to 'unput()' (as in the above +example), you must either first copy it elsewhere, or build your scanner +using '%array' instead (*note Matching::). + + Finally, note that you cannot put back 'EOF' to attempt to mark the +input stream with an end-of-file. + + 'input()' reads the next character from the input stream. For +example, the following is one way to eat up C comments: + + %% + "/*" { + int c; + + for ( ; ; ) + { + while ( (c = input()) != '*' && + c != EOF ) + ; /* eat up text of comment */ + + if ( c == '*' ) + { + while ( (c = input()) == '*' ) + ; + if ( c == '/' ) + break; /* found the end */ + } + + if ( c == EOF ) + { + error( "EOF in comment" ); + break; + } + } + } + + (Note that if the scanner is compiled using 'C++', then 'input()' is +instead referred to as yyinput(), in order to avoid a name clash with +the 'C++' stream by the name of 'input'.) + + 'YY_FLUSH_BUFFER;' flushes the scanner's internal buffer so that the +next time the scanner attempts to match a token, it will first refill +the buffer using 'YY_INPUT()' (*note Generated Scanner::). This action +is a special case of the more general 'yy_flush_buffer;' function, +described below (*note Multiple Input Buffers::) + + 'yyterminate()' can be used in lieu of a return statement in an +action. It terminates the scanner and returns a 0 to the scanner's +caller, indicating "all done". By default, 'yyterminate()' is also +called when an end-of-file is encountered. It is a macro and may be +redefined. + + +File: flex.info, Node: Generated Scanner, Next: Start Conditions, Prev: Actions, Up: Top + +9 The Generated Scanner +*********************** + +The output of 'flex' is the file 'lex.yy.c', which contains the scanning +routine 'yylex()', a number of tables used by it for matching tokens, +and a number of auxiliary routines and macros. By default, 'yylex()' is +declared as follows: + + int yylex() + { + ... various definitions and the actions in here ... + } + + (If your environment supports function prototypes, then it will be +'int yylex( void )'.) This definition may be changed by defining the +'YY_DECL' macro. For example, you could use: + + #define YY_DECL float lexscan( a, b ) float a, b; + + to give the scanning routine the name 'lexscan', returning a float, +and taking two floats as arguments. Note that if you give arguments to +the scanning routine using a K&R-style/non-prototyped function +declaration, you must terminate the definition with a semi-colon (;). + + 'flex' generates 'C99' function definitions by default. Flex used to +have the ability to generate obsolete, er, 'traditional', function +definitions. This was to support bootstrapping gcc on old systems. +Unfortunately, traditional definitions prevent us from using any +standard data types smaller than int (such as short, char, or bool) as +function arguments. Furthermore, traditional definitions support added +extra complexity in the skeleton file. For this reason, current +versions of 'flex' generate standard C99 code only, leaving K&R-style +functions to the historians. + + Whenever 'yylex()' is called, it scans tokens from the global input +file 'yyin' (which defaults to stdin). It continues until it either +reaches an end-of-file (at which point it returns the value 0) or one of +its actions executes a 'return' statement. + + If the scanner reaches an end-of-file, subsequent calls are undefined +unless either 'yyin' is pointed at a new input file (in which case +scanning continues from that file), or 'yyrestart()' is called. +'yyrestart()' takes one argument, a 'FILE *' pointer (which can be NULL, +if you've set up 'YY_INPUT' to scan from a source other than 'yyin'), +and initializes 'yyin' for scanning from that file. Essentially there +is no difference between just assigning 'yyin' to a new input file or +using 'yyrestart()' to do so; the latter is available for compatibility +with previous versions of 'flex', and because it can be used to switch +input files in the middle of scanning. It can also be used to throw +away the current input buffer, by calling it with an argument of 'yyin'; +but it would be better to use 'YY_FLUSH_BUFFER' (*note Actions::). Note +that 'yyrestart()' does _not_ reset the start condition to 'INITIAL' +(*note Start Conditions::). + + If 'yylex()' stops scanning due to executing a 'return' statement in +one of the actions, the scanner may then be called again and it will +resume scanning where it left off. + + By default (and for purposes of efficiency), the scanner uses +block-reads rather than simple 'getc()' calls to read characters from +'yyin'. The nature of how it gets its input can be controlled by +defining the 'YY_INPUT' macro. The calling sequence for 'YY_INPUT()' is +'YY_INPUT(buf,result,max_size)'. Its action is to place up to +'max_size' characters in the character array 'buf' and return in the +integer variable 'result' either the number of characters read or the +constant 'YY_NULL' (0 on Unix systems) to indicate 'EOF'. The default +'YY_INPUT' reads from the global file-pointer 'yyin'. + + Here is a sample definition of 'YY_INPUT' (in the definitions section +of the input file): + + %{ + #define YY_INPUT(buf,result,max_size) \ + { \ + int c = getchar(); \ + result = (c == EOF) ? YY_NULL : (buf[0] = c, 1); \ + } + %} + + This definition will change the input processing to occur one +character at a time. + + When the scanner receives an end-of-file indication from YY_INPUT, it +then checks the 'yywrap()' function. If 'yywrap()' returns false +(zero), then it is assumed that the function has gone ahead and set up +'yyin' to point to another input file, and scanning continues. If it +returns true (non-zero), then the scanner terminates, returning 0 to its +caller. Note that in either case, the start condition remains +unchanged; it does _not_ revert to 'INITIAL'. + + If you do not supply your own version of 'yywrap()', then you must +either use '%option noyywrap' (in which case the scanner behaves as +though 'yywrap()' returned 1), or you must link with '-lfl' to obtain +the default version of the routine, which always returns 1. + + For scanning from in-memory buffers (e.g., scanning strings), see +*note Scanning Strings::. *Note Multiple Input Buffers::. + + The scanner writes its 'ECHO' output to the 'yyout' global (default, +'stdout'), which may be redefined by the user simply by assigning it to +some other 'FILE' pointer. + + +File: flex.info, Node: Start Conditions, Next: Multiple Input Buffers, Prev: Generated Scanner, Up: Top + +10 Start Conditions +******************* + +'flex' provides a mechanism for conditionally activating rules. Any +rule whose pattern is prefixed with '<sc>' will only be active when the +scanner is in the "start condition" named 'sc'. For example, + + <STRING>[^"]* { /* eat up the string body ... */ + ... + } + + will be active only when the scanner is in the 'STRING' start +condition, and + + <INITIAL,STRING,QUOTE>\. { /* handle an escape ... */ + ... + } + + will be active only when the current start condition is either +'INITIAL', 'STRING', or 'QUOTE'. + + Start conditions are declared in the definitions (first) section of +the input using unindented lines beginning with either '%s' or '%x' +followed by a list of names. The former declares "inclusive" start +conditions, the latter "exclusive" start conditions. A start condition +is activated using the 'BEGIN' action. Until the next 'BEGIN' action is +executed, rules with the given start condition will be active and rules +with other start conditions will be inactive. If the start condition is +inclusive, then rules with no start conditions at all will also be +active. If it is exclusive, then _only_ rules qualified with the start +condition will be active. A set of rules contingent on the same +exclusive start condition describe a scanner which is independent of any +of the other rules in the 'flex' input. Because of this, exclusive +start conditions make it easy to specify "mini-scanners" which scan +portions of the input that are syntactically different from the rest +(e.g., comments). + + If the distinction between inclusive and exclusive start conditions +is still a little vague, here's a simple example illustrating the +connection between the two. The set of rules: + + %s example + %% + + <example>foo do_something(); + + bar something_else(); + + is equivalent to + + %x example + %% + + <example>foo do_something(); + + <INITIAL,example>bar something_else(); + + Without the '<INITIAL,example>' qualifier, the 'bar' pattern in the +second example wouldn't be active (i.e., couldn't match) when in start +condition 'example'. If we just used '<example>' to qualify 'bar', +though, then it would only be active in 'example' and not in 'INITIAL', +while in the first example it's active in both, because in the first +example the 'example' start condition is an inclusive '(%s)' start +condition. + + Also note that the special start-condition specifier '<*>' matches +every start condition. Thus, the above example could also have been +written: + + %x example + %% + + <example>foo do_something(); + + <*>bar something_else(); + + The default rule (to 'ECHO' any unmatched character) remains active +in start conditions. It is equivalent to: + + <*>.|\n ECHO; + + 'BEGIN(0)' returns to the original state where only the rules with no +start conditions are active. This state can also be referred to as the +start-condition 'INITIAL', so 'BEGIN(INITIAL)' is equivalent to +'BEGIN(0)'. (The parentheses around the start condition name are not +required but are considered good style.) + + 'BEGIN' actions can also be given as indented code at the beginning +of the rules section. For example, the following will cause the scanner +to enter the 'SPECIAL' start condition whenever 'yylex()' is called and +the global variable 'enter_special' is true: + + int enter_special; + + %x SPECIAL + %% + if ( enter_special ) + BEGIN(SPECIAL); + + <SPECIAL>blahblahblah + ...more rules follow... + + To illustrate the uses of start conditions, here is a scanner which +provides two different interpretations of a string like '123.456'. By +default it will treat it as three tokens, the integer '123', a dot +('.'), and the integer '456'. But if the string is preceded earlier in +the line by the string 'expect-floats' it will treat it as a single +token, the floating-point number '123.456': + + %{ + #include <math.h> + %} + %s expect + + %% + expect-floats BEGIN(expect); + + <expect>[0-9]+.[0-9]+ { + printf( "found a float, = %f\n", + atof( yytext ) ); + } + <expect>\n { + /* that's the end of the line, so + * we need another "expect-number" + * before we'll recognize any more + * numbers + */ + BEGIN(INITIAL); + } + + [0-9]+ { + printf( "found an integer, = %d\n", + atoi( yytext ) ); + } + + "." printf( "found a dot\n" ); + + Here is a scanner which recognizes (and discards) C comments while +maintaining a count of the current input line. + + %x comment + %% + int line_num = 1; + + "/*" BEGIN(comment); + + <comment>[^*\n]* /* eat anything that's not a '*' */ + <comment>"*"+[^*/\n]* /* eat up '*'s not followed by '/'s */ + <comment>\n ++line_num; + <comment>"*"+"/" BEGIN(INITIAL); + + This scanner goes to a bit of trouble to match as much text as +possible with each rule. In general, when attempting to write a +high-speed scanner try to match as much possible in each rule, as it's a +big win. + + Note that start-conditions names are really integer values and can be +stored as such. Thus, the above could be extended in the following +fashion: + + %x comment foo + %% + int line_num = 1; + int comment_caller; + + "/*" { + comment_caller = INITIAL; + BEGIN(comment); + } + + ... + + <foo>"/*" { + comment_caller = foo; + BEGIN(comment); + } + + <comment>[^*\n]* /* eat anything that's not a '*' */ + <comment>"*"+[^*/\n]* /* eat up '*'s not followed by '/'s */ + <comment>\n ++line_num; + <comment>"*"+"/" BEGIN(comment_caller); + + Furthermore, you can access the current start condition using the +integer-valued 'YY_START' macro. For example, the above assignments to +'comment_caller' could instead be written + + comment_caller = YY_START; + + Flex provides 'YYSTATE' as an alias for 'YY_START' (since that is +what's used by AT&T 'lex'). + + For historical reasons, start conditions do not have their own +name-space within the generated scanner. The start condition names are +unmodified in the generated scanner and generated header. *Note +option-header::. *Note option-prefix::. + + Finally, here's an example of how to match C-style quoted strings +using exclusive start conditions, including expanded escape sequences +(but not including checking for a string that's too long): + + %x str + + %% + char string_buf[MAX_STR_CONST]; + char *string_buf_ptr; + + + \" string_buf_ptr = string_buf; BEGIN(str); + + <str>\" { /* saw closing quote - all done */ + BEGIN(INITIAL); + *string_buf_ptr = '\0'; + /* return string constant token type and + * value to parser + */ + } + + <str>\n { + /* error - unterminated string constant */ + /* generate error message */ + } + + <str>\\[0-7]{1,3} { + /* octal escape sequence */ + int result; + + (void) sscanf( yytext + 1, "%o", &result ); + + if ( result > 0xff ) + /* error, constant is out-of-bounds */ + + *string_buf_ptr++ = result; + } + + <str>\\[0-9]+ { + /* generate error - bad escape sequence; something + * like '\48' or '\0777777' + */ + } + + <str>\\n *string_buf_ptr++ = '\n'; + <str>\\t *string_buf_ptr++ = '\t'; + <str>\\r *string_buf_ptr++ = '\r'; + <str>\\b *string_buf_ptr++ = '\b'; + <str>\\f *string_buf_ptr++ = '\f'; + + <str>\\(.|\n) *string_buf_ptr++ = yytext[1]; + + <str>[^\\\n\"]+ { + char *yptr = yytext; + + while ( *yptr ) + *string_buf_ptr++ = *yptr++; + } + + Often, such as in some of the examples above, you wind up writing a +whole bunch of rules all preceded by the same start condition(s). Flex +makes this a little easier and cleaner by introducing a notion of start +condition "scope". A start condition scope is begun with: + + <SCs>{ + + where '<SCs>' is a list of one or more start conditions. Inside the +start condition scope, every rule automatically has the prefix '<SCs>' +applied to it, until a '}' which matches the initial '{'. So, for +example, + + <ESC>{ + "\\n" return '\n'; + "\\r" return '\r'; + "\\f" return '\f'; + "\\0" return '\0'; + } + + is equivalent to: + + <ESC>"\\n" return '\n'; + <ESC>"\\r" return '\r'; + <ESC>"\\f" return '\f'; + <ESC>"\\0" return '\0'; + + Start condition scopes may be nested. + + The following routines are available for manipulating stacks of start +conditions: + + -- Function: void yy_push_state ( int 'new_state' ) + pushes the current start condition onto the top of the start + condition stack and switches to 'new_state' as though you had used + 'BEGIN new_state' (recall that start condition names are also + integers). + + -- Function: void yy_pop_state () + pops the top of the stack and switches to it via 'BEGIN'. + + -- Function: int yy_top_state () + returns the top of the stack without altering the stack's contents. + + The start condition stack grows dynamically and so has no built-in +size limitation. If memory is exhausted, program execution aborts. + + To use start condition stacks, your scanner must include a '%option +stack' directive (*note Scanner Options::). + + +File: flex.info, Node: Multiple Input Buffers, Next: EOF, Prev: Start Conditions, Up: Top + +11 Multiple Input Buffers +************************* + +Some scanners (such as those which support "include" files) require +reading from several input streams. As 'flex' scanners do a large +amount of buffering, one cannot control where the next input will be +read from by simply writing a 'YY_INPUT()' which is sensitive to the +scanning context. 'YY_INPUT()' is only called when the scanner reaches +the end of its buffer, which may be a long time after scanning a +statement such as an 'include' statement which requires switching the +input source. + + To negotiate these sorts of problems, 'flex' provides a mechanism for +creating and switching between multiple input buffers. An input buffer +is created by using: + + -- Function: YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size ) + + which takes a 'FILE' pointer and a size and creates a buffer +associated with the given file and large enough to hold 'size' +characters (when in doubt, use 'YY_BUF_SIZE' for the size). It returns +a 'YY_BUFFER_STATE' handle, which may then be passed to other routines +(see below). The 'YY_BUFFER_STATE' type is a pointer to an opaque +'struct yy_buffer_state' structure, so you may safely initialize +'YY_BUFFER_STATE' variables to '((YY_BUFFER_STATE) 0)' if you wish, and +also refer to the opaque structure in order to correctly declare input +buffers in source files other than that of your scanner. Note that the +'FILE' pointer in the call to 'yy_create_buffer' is only used as the +value of 'yyin' seen by 'YY_INPUT'. If you redefine 'YY_INPUT()' so it +no longer uses 'yyin', then you can safely pass a NULL 'FILE' pointer to +'yy_create_buffer'. You select a particular buffer to scan from using: + + -- Function: void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer ) + + The above function switches the scanner's input buffer so subsequent +tokens will come from 'new_buffer'. Note that 'yy_switch_to_buffer()' +may be used by 'yywrap()' to set things up for continued scanning, +instead of opening a new file and pointing 'yyin' at it. If you are +looking for a stack of input buffers, then you want to use +'yypush_buffer_state()' instead of this function. Note also that +switching input sources via either 'yy_switch_to_buffer()' or 'yywrap()' +does _not_ change the start condition. + + -- Function: void yy_delete_buffer ( YY_BUFFER_STATE buffer ) + + is used to reclaim the storage associated with a buffer. ('buffer' +can be NULL, in which case the routine does nothing.) You can also +clear the current contents of a buffer using: + + -- Function: void yypush_buffer_state ( YY_BUFFER_STATE buffer ) + + This function pushes the new buffer state onto an internal stack. +The pushed state becomes the new current state. The stack is maintained +by flex and will grow as required. This function is intended to be used +instead of 'yy_switch_to_buffer', when you want to change states, but +preserve the current state for later use. + + -- Function: void yypop_buffer_state ( ) + + This function removes the current state from the top of the stack, +and deletes it by calling 'yy_delete_buffer'. The next state on the +stack, if any, becomes the new current state. + + -- Function: void yy_flush_buffer ( YY_BUFFER_STATE buffer ) + + This function discards the buffer's contents, so the next time the +scanner attempts to match a token from the buffer, it will first fill +the buffer anew using 'YY_INPUT()'. + + -- Function: YY_BUFFER_STATE yy_new_buffer ( FILE *file, int size ) + + is an alias for 'yy_create_buffer()', provided for compatibility with +the C++ use of 'new' and 'delete' for creating and destroying dynamic +objects. + + 'YY_CURRENT_BUFFER' macro returns a 'YY_BUFFER_STATE' handle to the +current buffer. It should not be used as an lvalue. + + Here are two examples of using these features for writing a scanner +which expands include files (the '<<EOF>>' feature is discussed below). + + This first example uses yypush_buffer_state and yypop_buffer_state. +Flex maintains the stack internally. + + /* the "incl" state is used for picking up the name + * of an include file + */ + %x incl + %% + include BEGIN(incl); + + [a-z]+ ECHO; + [^a-z\n]*\n? ECHO; + + <incl>[ \t]* /* eat the whitespace */ + <incl>[^ \t\n]+ { /* got the include file name */ + yyin = fopen( yytext, "r" ); + + if ( ! yyin ) + error( ... ); + + yypush_buffer_state(yy_create_buffer( yyin, YY_BUF_SIZE )); + + BEGIN(INITIAL); + } + + <<EOF>> { + yypop_buffer_state(); + + if ( !YY_CURRENT_BUFFER ) + { + yyterminate(); + } + } + + The second example, below, does the same thing as the previous +example did, but manages its own input buffer stack manually (instead of +letting flex do it). + + /* the "incl" state is used for picking up the name + * of an include file + */ + %x incl + + %{ + #define MAX_INCLUDE_DEPTH 10 + YY_BUFFER_STATE include_stack[MAX_INCLUDE_DEPTH]; + int include_stack_ptr = 0; + %} + + %% + include BEGIN(incl); + + [a-z]+ ECHO; + [^a-z\n]*\n? ECHO; + + <incl>[ \t]* /* eat the whitespace */ + <incl>[^ \t\n]+ { /* got the include file name */ + if ( include_stack_ptr >= MAX_INCLUDE_DEPTH ) + { + fprintf( stderr, "Includes nested too deeply" ); + exit( 1 ); + } + + include_stack[include_stack_ptr++] = + YY_CURRENT_BUFFER; + + yyin = fopen( yytext, "r" ); + + if ( ! yyin ) + error( ... ); + + yy_switch_to_buffer( + yy_create_buffer( yyin, YY_BUF_SIZE ) ); + + BEGIN(INITIAL); + } + + <<EOF>> { + if ( --include_stack_ptr == 0 ) + { + yyterminate(); + } + + else + { + yy_delete_buffer( YY_CURRENT_BUFFER ); + yy_switch_to_buffer( + include_stack[include_stack_ptr] ); + } + } + + The following routines are available for setting up input buffers for +scanning in-memory strings instead of files. All of them create a new +input buffer for scanning the string, and return a corresponding +'YY_BUFFER_STATE' handle (which you should delete with +'yy_delete_buffer()' when done with it). They also switch to the new +buffer using 'yy_switch_to_buffer()', so the next call to 'yylex()' will +start scanning the string. + + -- Function: YY_BUFFER_STATE yy_scan_string ( const char *str ) + scans a NUL-terminated string. + + -- Function: YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len + ) + scans 'len' bytes (including possibly 'NUL's) starting at location + 'bytes'. + + Note that both of these functions create and scan a _copy_ of the +string or bytes. (This may be desirable, since 'yylex()' modifies the +contents of the buffer it is scanning.) You can avoid the copy by +using: + + -- Function: YY_BUFFER_STATE yy_scan_buffer (char *base, yy_size_t + size) + which scans in place the buffer starting at 'base', consisting of + 'size' bytes, the last two bytes of which _must_ be + 'YY_END_OF_BUFFER_CHAR' (ASCII NUL). These last two bytes are not + scanned; thus, scanning consists of 'base[0]' through + 'base[size-2]', inclusive. + + If you fail to set up 'base' in this manner (i.e., forget the final +two 'YY_END_OF_BUFFER_CHAR' bytes), then 'yy_scan_buffer()' returns a +NULL pointer instead of creating a new input buffer. + + -- Data type: yy_size_t + is an integral type to which you can cast an integer expression + reflecting the size of the buffer. + + +File: flex.info, Node: EOF, Next: Misc Macros, Prev: Multiple Input Buffers, Up: Top + +12 End-of-File Rules +******************** + +The special rule '<<EOF>>' indicates actions which are to be taken when +an end-of-file is encountered and 'yywrap()' returns non-zero (i.e., +indicates no further files to process). The action must finish by doing +one of the following things: + + * assigning 'yyin' to a new input file (in previous versions of + 'flex', after doing the assignment you had to call the special + action 'YY_NEW_FILE'. This is no longer necessary.) + + * executing a 'return' statement; + + * executing the special 'yyterminate()' action. + + * or, switching to a new buffer using 'yy_switch_to_buffer()' as + shown in the example above. + + <<EOF>> rules may not be used with other patterns; they may only be +qualified with a list of start conditions. If an unqualified <<EOF>> +rule is given, it applies to _all_ start conditions which do not already +have <<EOF>> actions. To specify an <<EOF>> rule for only the initial +start condition, use: + + <INITIAL><<EOF>> + + These rules are useful for catching things like unclosed comments. +An example: + + %x quote + %% + + ...other rules for dealing with quotes... + + <quote><<EOF>> { + error( "unterminated quote" ); + yyterminate(); + } + <<EOF>> { + if ( *++filelist ) + yyin = fopen( *filelist, "r" ); + else + yyterminate(); + } + + +File: flex.info, Node: Misc Macros, Next: User Values, Prev: EOF, Up: Top + +13 Miscellaneous Macros +*********************** + +The macro 'YY_USER_ACTION' can be defined to provide an action which is +always executed prior to the matched rule's action. For example, it +could be #define'd to call a routine to convert yytext to lower-case. +When 'YY_USER_ACTION' is invoked, the variable 'yy_act' gives the number +of the matched rule (rules are numbered starting with 1). Suppose you +want to profile how often each of your rules is matched. The following +would do the trick: + + #define YY_USER_ACTION ++ctr[yy_act] + + where 'ctr' is an array to hold the counts for the different rules. +Note that the macro 'YY_NUM_RULES' gives the total number of rules +(including the default rule), even if you use '-s)', so a correct +declaration for 'ctr' is: + + int ctr[YY_NUM_RULES]; + + The macro 'YY_USER_INIT' may be defined to provide an action which is +always executed before the first scan (and before the scanner's internal +initializations are done). For example, it could be used to call a +routine to read in a data table or open a logging file. + + The macro 'yy_set_interactive(is_interactive)' can be used to control +whether the current buffer is considered "interactive". An interactive +buffer is processed more slowly, but must be used when the scanner's +input source is indeed interactive to avoid problems due to waiting to +fill buffers (see the discussion of the '-I' flag in *note Scanner +Options::). A non-zero value in the macro invocation marks the buffer +as interactive, a zero value as non-interactive. Note that use of this +macro overrides '%option always-interactive' or '%option +never-interactive' (*note Scanner Options::). 'yy_set_interactive()' +must be invoked prior to beginning to scan the buffer that is (or is +not) to be considered interactive. + + The macro 'yy_set_bol(at_bol)' can be used to control whether the +current buffer's scanning context for the next token match is done as +though at the beginning of a line. A non-zero macro argument makes +rules anchored with '^' active, while a zero argument makes '^' rules +inactive. + + The macro 'YY_AT_BOL()' returns true if the next token scanned from +the current buffer will have '^' rules active, false otherwise. + + In the generated scanner, the actions are all gathered in one large +switch statement and separated using 'YY_BREAK', which may be redefined. +By default, it is simply a 'break', to separate each rule's action from +the following rule's. Redefining 'YY_BREAK' allows, for example, C++ +users to #define YY_BREAK to do nothing (while being very careful that +every rule ends with a 'break' or a 'return'!) to avoid suffering from +unreachable statement warnings where because a rule's action ends with +'return', the 'YY_BREAK' is inaccessible. + + +File: flex.info, Node: User Values, Next: Yacc, Prev: Misc Macros, Up: Top + +14 Values Available To the User +******************************* + +This chapter summarizes the various values available to the user in the +rule actions. + +'char *yytext' + holds the text of the current token. It may be modified but not + lengthened (you cannot append characters to the end). + + If the special directive '%array' appears in the first section of + the scanner description, then 'yytext' is instead declared 'char + yytext[YYLMAX]', where 'YYLMAX' is a macro definition that you can + redefine in the first section if you don't like the default value + (generally 8KB). Using '%array' results in somewhat slower + scanners, but the value of 'yytext' becomes immune to calls to + 'unput()', which potentially destroy its value when 'yytext' is a + character pointer. The opposite of '%array' is '%pointer', which + is the default. + + You cannot use '%array' when generating C++ scanner classes (the + '-+' flag). + +'int yyleng' + holds the length of the current token. + +'FILE *yyin' + is the file which by default 'flex' reads from. It may be + redefined but doing so only makes sense before scanning begins or + after an EOF has been encountered. Changing it in the midst of + scanning will have unexpected results since 'flex' buffers its + input; use 'yyrestart()' instead. Once scanning terminates because + an end-of-file has been seen, you can assign 'yyin' at the new + input file and then call the scanner again to continue scanning. + +'void yyrestart( FILE *new_file )' + may be called to point 'yyin' at the new input file. The + switch-over to the new file is immediate (any previously + buffered-up input is lost). Note that calling 'yyrestart()' with + 'yyin' as an argument thus throws away the current input buffer and + continues scanning the same input file. + +'FILE *yyout' + is the file to which 'ECHO' actions are done. It can be reassigned + by the user. + +'YY_CURRENT_BUFFER' + returns a 'YY_BUFFER_STATE' handle to the current buffer. + +'YY_START' + returns an integer value corresponding to the current start + condition. You can subsequently use this value with 'BEGIN' to + return to that start condition. + + +File: flex.info, Node: Yacc, Next: Scanner Options, Prev: User Values, Up: Top + +15 Interfacing with Yacc +************************ + +One of the main uses of 'flex' is as a companion to the 'yacc' +parser-generator. 'yacc' parsers expect to call a routine named +'yylex()' to find the next input token. The routine is supposed to +return the type of the next token as well as putting any associated +value in the global 'yylval'. To use 'flex' with 'yacc', one specifies +the '-d' option to 'yacc' to instruct it to generate the file 'y.tab.h' +containing definitions of all the '%tokens' appearing in the 'yacc' +input. This file is then included in the 'flex' scanner. For example, +if one of the tokens is 'TOK_NUMBER', part of the scanner might look +like: + + %{ + #include "y.tab.h" + %} + + %% + + [0-9]+ yylval = atoi( yytext ); return TOK_NUMBER; + + +File: flex.info, Node: Scanner Options, Next: Performance, Prev: Yacc, Up: Top + +16 Scanner Options +****************** + +The various 'flex' options are categorized by function in the following +menu. If you want to lookup a particular option by name, *Note Index of +Scanner Options::. + +* Menu: + +* Options for Specifying Filenames:: +* Options Affecting Scanner Behavior:: +* Code-Level And API Options:: +* Options for Scanner Speed and Size:: +* Debugging Options:: +* Miscellaneous Options:: + + Even though there are many scanner options, a typical scanner might +only specify the following options: + + %option 8bit reentrant bison-bridge + %option warn nodefault + %option yylineno + %option outfile="scanner.c" header-file="scanner.h" + + The first line specifies the general type of scanner we want. The +second line specifies that we are being careful. The third line asks +flex to track line numbers. The last line tells flex what to name the +files. (The options can be specified in any order. We just divided +them.) + + 'flex' also provides a mechanism for controlling options within the +scanner specification itself, rather than from the flex command-line. +This is done by including '%option' directives in the first section of +the scanner specification. You can specify multiple options with a +single '%option' directive, and multiple directives in the first section +of your flex input file. + + Most options are given simply as names, optionally preceded by the +word 'no' (with no intervening whitespace) to negate their meaning. The +names are the same as their long-option equivalents (but without the +leading '--' ). + + 'flex' scans your rule actions to determine whether you use the +'REJECT' or 'yymore()' features. The 'REJECT' and 'yymore' options are +available to override its decision as to whether you use the options, +either by setting them (e.g., '%option reject)' to indicate the feature +is indeed used, or unsetting them to indicate it actually is not used +(e.g., '%option noyymore)'. + + A number of options are available for lint purists who want to +suppress the appearance of unneeded routines in the generated scanner. +Each of the following, if unset (e.g., '%option nounput'), results in +the corresponding routine not appearing in the generated scanner: + + input, unput + yy_push_state, yy_pop_state, yy_top_state + yy_scan_buffer, yy_scan_bytes, yy_scan_string + + yyget_extra, yyset_extra, yyget_leng, yyget_text, + yyget_lineno, yyset_lineno, yyget_in, yyset_in, + yyget_out, yyset_out, yyget_lval, yyset_lval, + yyget_lloc, yyset_lloc, yyget_debug, yyset_debug + + (though 'yy_push_state()' and friends won't appear anyway unless you +use '%option stack)'. + + +File: flex.info, Node: Options for Specifying Filenames, Next: Options Affecting Scanner Behavior, Prev: Scanner Options, Up: Scanner Options + +16.1 Options for Specifying Filenames +===================================== + +'--header-file=FILE, '%option header-file="FILE"'' + instructs flex to write a C header to 'FILE'. This file contains + function prototypes, extern variables, and types used by the + scanner. Only the external API is exported by the header file. + Many macros that are usable from within scanner actions are not + exported to the header file. This is due to namespace problems and + the goal of a clean external API. + + While in the header, the macro 'yyIN_HEADER' is defined, where 'yy' + is substituted with the appropriate prefix. + + The '--header-file' option is not compatible with the '--c++' + option, since the C++ scanner provides its own header in + 'yyFlexLexer.h'. + +'-oFILE, --outfile=FILE, '%option outfile="FILE"'' + directs flex to write the scanner to the file 'FILE' instead of + 'lex.yy.c'. If you combine '--outfile' with the '--stdout' option, + then the scanner is written to 'stdout' but its '#line' directives + (see the '-l' option above) refer to the file 'FILE'. + +'-t, --stdout, '%option stdout'' + instructs 'flex' to write the scanner it generates to standard + output instead of 'lex.yy.c'. + +'-SFILE, --skel=FILE' + overrides the default skeleton file from which 'flex' constructs + its scanners. You'll never need this option unless you are doing + 'flex' maintenance or development. + +'--tables-file=FILE' + Write serialized scanner dfa tables to FILE. The generated scanner + will not contain the tables, and requires them to be loaded at + runtime. *Note serialization::. + +'--tables-verify' + This option is for flex development. We document it here in case + you stumble upon it by accident or in case you suspect some + inconsistency in the serialized tables. Flex will serialize the + scanner dfa tables but will also generate the in-code tables as it + normally does. At runtime, the scanner will verify that the + serialized tables match the in-code tables, instead of loading + them. + + +File: flex.info, Node: Options Affecting Scanner Behavior, Next: Code-Level And API Options, Prev: Options for Specifying Filenames, Up: Scanner Options + +16.2 Options Affecting Scanner Behavior +======================================= + +'-i, --case-insensitive, '%option case-insensitive'' + instructs 'flex' to generate a "case-insensitive" scanner. The + case of letters given in the 'flex' input patterns will be ignored, + and tokens in the input will be matched regardless of case. The + matched text given in 'yytext' will have the preserved case (i.e., + it will not be folded). For tricky behavior, see *note case and + character ranges::. + +'-l, --lex-compat, '%option lex-compat'' + turns on maximum compatibility with the original AT&T 'lex' + implementation. Note that this does not mean _full_ compatibility. + Use of this option costs a considerable amount of performance, and + it cannot be used with the '--c++', '--full', '--fast', '-Cf', or + '-CF' options. For details on the compatibilities it provides, see + *note Lex and Posix::. This option also results in the name + 'YY_FLEX_LEX_COMPAT' being '#define''d in the generated scanner. + +'-B, --batch, '%option batch'' + instructs 'flex' to generate a "batch" scanner, the opposite of + _interactive_ scanners generated by '--interactive' (see below). + In general, you use '-B' when you are _certain_ that your scanner + will never be used interactively, and you want to squeeze a + _little_ more performance out of it. If your goal is instead to + squeeze out a _lot_ more performance, you should be using the '-Cf' + or '-CF' options, which turn on '--batch' automatically anyway. + +'-I, --interactive, '%option interactive'' + instructs 'flex' to generate an interactive scanner. An + interactive scanner is one that only looks ahead to decide what + token has been matched if it absolutely must. It turns out that + always looking one extra character ahead, even if the scanner has + already seen enough text to disambiguate the current token, is a + bit faster than only looking ahead when necessary. But scanners + that always look ahead give dreadful interactive performance; for + example, when a user types a newline, it is not recognized as a + newline token until they enter _another_ token, which often means + typing in another whole line. + + 'flex' scanners default to 'interactive' unless you use the '-Cf' + or '-CF' table-compression options (*note Performance::). That's + because if you're looking for high-performance you should be using + one of these options, so if you didn't, 'flex' assumes you'd rather + trade off a bit of run-time performance for intuitive interactive + behavior. Note also that you _cannot_ use '--interactive' in + conjunction with '-Cf' or '-CF'. Thus, this option is not really + needed; it is on by default for all those cases in which it is + allowed. + + You can force a scanner to _not_ be interactive by using '--batch' + +'-7, --7bit, '%option 7bit'' + instructs 'flex' to generate a 7-bit scanner, i.e., one which can + only recognize 7-bit characters in its input. The advantage of + using '--7bit' is that the scanner's tables can be up to half the + size of those generated using the '--8bit'. The disadvantage is + that such scanners often hang or crash if their input contains an + 8-bit character. + + Note, however, that unless you generate your scanner using the + '-Cf' or '-CF' table compression options, use of '--7bit' will save + only a small amount of table space, and make your scanner + considerably less portable. 'Flex''s default behavior is to + generate an 8-bit scanner unless you use the '-Cf' or '-CF', in + which case 'flex' defaults to generating 7-bit scanners unless your + site was always configured to generate 8-bit scanners (as will + often be the case with non-USA sites). You can tell whether flex + generated a 7-bit or an 8-bit scanner by inspecting the flag + summary in the '--verbose' output as described above. + + Note that if you use '-Cfe' or '-CFe' 'flex' still defaults to + generating an 8-bit scanner, since usually with these compression + options full 8-bit tables are not much more expensive than 7-bit + tables. + +'-8, --8bit, '%option 8bit'' + instructs 'flex' to generate an 8-bit scanner, i.e., one which can + recognize 8-bit characters. This flag is only needed for scanners + generated using '-Cf' or '-CF', as otherwise flex defaults to + generating an 8-bit scanner anyway. + + See the discussion of '--7bit' above for 'flex''s default behavior + and the tradeoffs between 7-bit and 8-bit scanners. + +'--default, '%option default'' + generate the default rule. + +'--always-interactive, '%option always-interactive'' + instructs flex to generate a scanner which always considers its + input _interactive_. Normally, on each new input file the scanner + calls 'isatty()' in an attempt to determine whether the scanner's + input source is interactive and thus should be read a character at + a time. When this option is used, however, then no such call is + made. + +'--never-interactive, '--never-interactive'' + instructs flex to generate a scanner which never considers its + input interactive. This is the opposite of 'always-interactive'. + +'-X, --posix, '%option posix'' + turns on maximum compatibility with the POSIX 1003.2-1992 + definition of 'lex'. Since 'flex' was originally designed to + implement the POSIX definition of 'lex' this generally involves + very few changes in behavior. At the current writing the known + differences between 'flex' and the POSIX standard are: + + * In POSIX and AT&T 'lex', the repeat operator, '{}', has lower + precedence than concatenation (thus 'ab{3}' yields 'ababab'). + Most POSIX utilities use an Extended Regular Expression (ERE) + precedence that has the precedence of the repeat operator + higher than concatenation (which causes 'ab{3}' to yield + 'abbb'). By default, 'flex' places the precedence of the + repeat operator higher than concatenation which matches the + ERE processing of other POSIX utilities. When either + '--posix' or '-l' are specified, 'flex' will use the + traditional AT&T and POSIX-compliant precedence for the repeat + operator where concatenation has higher precedence than the + repeat operator. + +'--stack, '%option stack'' + enables the use of start condition stacks (*note Start + Conditions::). + +'--stdinit, '%option stdinit'' + if set (i.e., %option stdinit) initializes 'yyin' and 'yyout' to + 'stdin' and 'stdout', instead of the default of 'NULL'. Some + existing 'lex' programs depend on this behavior, even though it is + not compliant with ANSI C, which does not require 'stdin' and + 'stdout' to be compile-time constant. In a reentrant scanner, + however, this is not a problem since initialization is performed in + 'yylex_init' at runtime. + +'--yylineno, '%option yylineno'' + directs 'flex' to generate a scanner that maintains the number of + the current line read from its input in the global variable + 'yylineno'. This option is implied by '%option lex-compat'. In a + reentrant C scanner, the macro 'yylineno' is accessible regardless + of the value of '%option yylineno', however, its value is not + modified by 'flex' unless '%option yylineno' is enabled. + +'--yywrap, '%option yywrap'' + if unset (i.e., '--noyywrap)', makes the scanner not call + 'yywrap()' upon an end-of-file, but simply assume that there are no + more files to scan (until the user points 'yyin' at a new file and + calls 'yylex()' again). + + +File: flex.info, Node: Code-Level And API Options, Next: Options for Scanner Speed and Size, Prev: Options Affecting Scanner Behavior, Up: Scanner Options + +16.3 Code-Level And API Options +=============================== + +'--ansi-definitions, '%option ansi-definitions'' + Deprecated, ignored + +'--ansi-prototypes, '%option ansi-prototypes'' + Deprecated, ignored + +'--bison-bridge, '%option bison-bridge'' + instructs flex to generate a C scanner that is meant to be called + by a 'GNU bison' parser. The scanner has minor API changes for + 'bison' compatibility. In particular, the declaration of 'yylex' + is modified to take an additional parameter, 'yylval'. *Note Bison + Bridge::. + +'--bison-locations, '%option bison-locations'' + instruct flex that 'GNU bison' '%locations' are being used. This + means 'yylex' will be passed an additional parameter, 'yylloc'. + This option implies '%option bison-bridge'. *Note Bison Bridge::. + +'-L, --noline, '%option noline'' + instructs 'flex' not to generate '#line' directives. Without this + option, 'flex' peppers the generated scanner with '#line' + directives so error messages in the actions will be correctly + located with respect to either the original 'flex' input file (if + the errors are due to code in the input file), or 'lex.yy.c' (if + the errors are 'flex''s fault - you should report these sorts of + errors to the email address given in *note Reporting Bugs::). + +'-R, --reentrant, '%option reentrant'' + instructs flex to generate a reentrant C scanner. The generated + scanner may safely be used in a multi-threaded environment. The + API for a reentrant scanner is different than for a non-reentrant + scanner *note Reentrant::). Because of the API difference between + reentrant and non-reentrant 'flex' scanners, non-reentrant flex + code must be modified before it is suitable for use with this + option. This option is not compatible with the '--c++' option. + + The option '--reentrant' does not affect the performance of the + scanner. + +'-+, --c++, '%option c++'' + specifies that you want flex to generate a C++ scanner class. + *Note Cxx::, for details. + +'--array, '%option array'' + specifies that you want yytext to be an array instead of a char* + +'--pointer, '%option pointer'' + specify that 'yytext' should be a 'char *', not an array. This + default is 'char *'. + +'-PPREFIX, --prefix=PREFIX, '%option prefix="PREFIX"'' + changes the default 'yy' prefix used by 'flex' for all + globally-visible variable and function names to instead be + 'PREFIX'. For example, '--prefix=foo' changes the name of 'yytext' + to 'footext'. It also changes the name of the default output file + from 'lex.yy.c' to 'lex.foo.c'. Here is a partial list of the + names affected: + + yy_create_buffer + yy_delete_buffer + yy_flex_debug + yy_init_buffer + yy_flush_buffer + yy_load_buffer_state + yy_switch_to_buffer + yyin + yyleng + yylex + yylineno + yyout + yyrestart + yytext + yywrap + yyalloc + yyrealloc + yyfree + + (If you are using a C++ scanner, then only 'yywrap' and + 'yyFlexLexer' are affected.) Within your scanner itself, you can + still refer to the global variables and functions using either + version of their name; but externally, they have the modified name. + + This option lets you easily link together multiple 'flex' programs + into the same executable. Note, though, that using this option + also renames 'yywrap()', so you now _must_ either provide your own + (appropriately-named) version of the routine for your scanner, or + use '%option noyywrap', as linking with '-lfl' no longer provides + one for you by default. + +'--main, '%option main'' + directs flex to provide a default 'main()' program for the scanner, + which simply calls 'yylex()'. This option implies 'noyywrap' (see + below). + +'--nounistd, '%option nounistd'' + suppresses inclusion of the non-ANSI header file 'unistd.h'. This + option is meant to target environments in which 'unistd.h' does not + exist. Be aware that certain options may cause flex to generate + code that relies on functions normally found in 'unistd.h', (e.g. + 'isatty()', 'read()'.) If you wish to use these functions, you + will have to inform your compiler where to find them. *Note + option-always-interactive::. *Note option-read::. + +'--yyclass=NAME, '%option yyclass="NAME"'' + only applies when generating a C++ scanner (the '--c++' option). + It informs 'flex' that you have derived 'NAME' as a subclass of + 'yyFlexLexer', so 'flex' will place your actions in the member + function 'foo::yylex()' instead of 'yyFlexLexer::yylex()'. It also + generates a 'yyFlexLexer::yylex()' member function that emits a + run-time error (by invoking 'yyFlexLexer::LexerError())' if called. + *Note Cxx::. + + +File: flex.info, Node: Options for Scanner Speed and Size, Next: Debugging Options, Prev: Code-Level And API Options, Up: Scanner Options + +16.4 Options for Scanner Speed and Size +======================================= + +'-C[aefFmr]' + controls the degree of table compression and, more generally, + trade-offs between small scanners and fast scanners. + + '-C' + A lone '-C' specifies that the scanner tables should be + compressed but neither equivalence classes nor + meta-equivalence classes should be used. + + '-Ca, --align, '%option align'' + ("align") instructs flex to trade off larger tables in the + generated scanner for faster performance because the elements + of the tables are better aligned for memory access and + computation. On some RISC architectures, fetching and + manipulating longwords is more efficient than with + smaller-sized units such as shortwords. This option can + quadruple the size of the tables used by your scanner. + + '-Ce, --ecs, '%option ecs'' + directs 'flex' to construct "equivalence classes", i.e., sets + of characters which have identical lexical properties (for + example, if the only appearance of digits in the 'flex' input + is in the character class "[0-9]" then the digits '0', '1', + ..., '9' will all be put in the same equivalence class). + Equivalence classes usually give dramatic reductions in the + final table/object file sizes (typically a factor of 2-5) and + are pretty cheap performance-wise (one array look-up per + character scanned). + + '-Cf' + specifies that the "full" scanner tables should be generated - + 'flex' should not compress the tables by taking advantages of + similar transition functions for different states. + + '-CF' + specifies that the alternate fast scanner representation + (described above under the '--fast' flag) should be used. + This option cannot be used with '--c++'. + + '-Cm, --meta-ecs, '%option meta-ecs'' + directs 'flex' to construct "meta-equivalence classes", which + are sets of equivalence classes (or characters, if equivalence + classes are not being used) that are commonly used together. + Meta-equivalence classes are often a big win when using + compressed tables, but they have a moderate performance impact + (one or two 'if' tests and one array look-up per character + scanned). + + '-Cr, --read, '%option read'' + causes the generated scanner to _bypass_ use of the standard + I/O library ('stdio') for input. Instead of calling 'fread()' + or 'getc()', the scanner will use the 'read()' system call, + resulting in a performance gain which varies from system to + system, but in general is probably negligible unless you are + also using '-Cf' or '-CF'. Using '-Cr' can cause strange + behavior if, for example, you read from 'yyin' using 'stdio' + prior to calling the scanner (because the scanner will miss + whatever text your previous reads left in the 'stdio' input + buffer). '-Cr' has no effect if you define 'YY_INPUT()' + (*note Generated Scanner::). + + The options '-Cf' or '-CF' and '-Cm' do not make sense together - + there is no opportunity for meta-equivalence classes if the table + is not being compressed. Otherwise the options may be freely + mixed, and are cumulative. + + The default setting is '-Cem', which specifies that 'flex' should + generate equivalence classes and meta-equivalence classes. This + setting provides the highest degree of table compression. You can + trade off faster-executing scanners at the cost of larger tables + with the following generally being true: + + slowest & smallest + -Cem + -Cm + -Ce + -C + -C{f,F}e + -C{f,F} + -C{f,F}a + fastest & largest + + Note that scanners with the smallest tables are usually generated + and compiled the quickest, so during development you will usually + want to use the default, maximal compression. + + '-Cfe' is often a good compromise between speed and size for + production scanners. + +'-f, --full, '%option full'' + specifies "fast scanner". No table compression is done and 'stdio' + is bypassed. The result is large but fast. This option is + equivalent to '--Cfr' + +'-F, --fast, '%option fast'' + specifies that the _fast_ scanner table representation should be + used (and 'stdio' bypassed). This representation is about as fast + as the full table representation '--full', and for some sets of + patterns will be considerably smaller (and for others, larger). In + general, if the pattern set contains both _keywords_ and a + catch-all, _identifier_ rule, such as in the set: + + "case" return TOK_CASE; + "switch" return TOK_SWITCH; + ... + "default" return TOK_DEFAULT; + [a-z]+ return TOK_ID; + + then you're better off using the full table representation. If + only the _identifier_ rule is present and you then use a hash table + or some such to detect the keywords, you're better off using + '--fast'. + + This option is equivalent to '-CFr'. It cannot be used with + '--c++'. + + +File: flex.info, Node: Debugging Options, Next: Miscellaneous Options, Prev: Options for Scanner Speed and Size, Up: Scanner Options + +16.5 Debugging Options +====================== + +'-b, --backup, '%option backup'' + Generate backing-up information to 'lex.backup'. This is a list of + scanner states which require backing up and the input characters on + which they do so. By adding rules one can remove backing-up + states. If _all_ backing-up states are eliminated and '-Cf' or + '-CF' is used, the generated scanner will run faster (see the + '--perf-report' flag). Only users who wish to squeeze every last + cycle out of their scanners need worry about this option. (*note + Performance::). + +'-d, --debug, '%option debug'' + makes the generated scanner run in "debug" mode. Whenever a + pattern is recognized and the global variable 'yy_flex_debug' is + non-zero (which is the default), the scanner will write to 'stderr' + a line of the form: + + -accepting rule at line 53 ("the matched text") + + The line number refers to the location of the rule in the file + defining the scanner (i.e., the file that was fed to flex). + Messages are also generated when the scanner backs up, accepts the + default rule, reaches the end of its input buffer (or encounters a + NUL; at this point, the two look the same as far as the scanner's + concerned), or reaches an end-of-file. + +'-p, --perf-report, '%option perf-report'' + generates a performance report to 'stderr'. The report consists of + comments regarding features of the 'flex' input file which will + cause a serious loss of performance in the resulting scanner. If + you give the flag twice, you will also get comments regarding + features that lead to minor performance losses. + + Note that the use of 'REJECT', and variable trailing context (*note + Limitations::) entails a substantial performance penalty; use of + 'yymore()', the '^' operator, and the '--interactive' flag entail + minor performance penalties. + +'-s, --nodefault, '%option nodefault'' + causes the _default rule_ (that unmatched scanner input is echoed + to 'stdout)' to be suppressed. If the scanner encounters input + that does not match any of its rules, it aborts with an error. + This option is useful for finding holes in a scanner's rule set. + +'-T, --trace, '%option trace'' + makes 'flex' run in "trace" mode. It will generate a lot of + messages to 'stderr' concerning the form of the input and the + resultant non-deterministic and deterministic finite automata. + This option is mostly for use in maintaining 'flex'. + +'-w, --nowarn, '%option nowarn'' + suppresses warning messages. + +'-v, --verbose, '%option verbose'' + specifies that 'flex' should write to 'stderr' a summary of + statistics regarding the scanner it generates. Most of the + statistics are meaningless to the casual 'flex' user, but the first + line identifies the version of 'flex' (same as reported by + '--version'), and the next line the flags used when generating the + scanner, including those that are on by default. + +'--warn, '%option warn'' + warn about certain things. In particular, if the default rule can + be matched but no default rule has been given, the flex will warn + you. We recommend using this option always. + + +File: flex.info, Node: Miscellaneous Options, Prev: Debugging Options, Up: Scanner Options + +16.6 Miscellaneous Options +========================== + +'-c' + A do-nothing option included for POSIX compliance. + +'-h, -?, --help' + generates a "help" summary of 'flex''s options to 'stdout' and then + exits. + +'-n' + Another do-nothing option included for POSIX compliance. + +'-V, --version' + prints the version number to 'stdout' and exits. + + +File: flex.info, Node: Performance, Next: Cxx, Prev: Scanner Options, Up: Top + +17 Performance Considerations +***************************** + +The main design goal of 'flex' is that it generate high-performance +scanners. It has been optimized for dealing well with large sets of +rules. Aside from the effects on scanner speed of the table compression +'-C' options outlined above, there are a number of options/actions which +degrade performance. These are, from most expensive to least: + + REJECT + arbitrary trailing context + + pattern sets that require backing up + %option yylineno + %array + + %option interactive + %option always-interactive + + ^ beginning-of-line operator + yymore() + + with the first two all being quite expensive and the last two being +quite cheap. Note also that 'unput()' is implemented as a routine call +that potentially does quite a bit of work, while 'yyless()' is a +quite-cheap macro. So if you are just putting back some excess text you +scanned, use 'yyless()'. + + 'REJECT' should be avoided at all costs when performance is +important. It is a particularly expensive option. + + There is one case when '%option yylineno' can be expensive. That is +when your patterns match long tokens that could _possibly_ contain a +newline character. There is no performance penalty for rules that can +not possibly match newlines, since flex does not need to check them for +newlines. In general, you should avoid rules such as '[^f]+', which +match very long tokens, including newlines, and may possibly match your +entire file! A better approach is to separate '[^f]+' into two rules: + + %option yylineno + %% + [^f\n]+ + \n+ + + The above scanner does not incur a performance penalty. + + Getting rid of backing up is messy and often may be an enormous +amount of work for a complicated scanner. In principal, one begins by +using the '-b' flag to generate a 'lex.backup' file. For example, on +the input: + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + + the file looks like: + + State #6 is non-accepting - + associated rule line numbers: + 2 3 + out-transitions: [ o ] + jam-transitions: EOF [ \001-n p-\177 ] + + State #8 is non-accepting - + associated rule line numbers: + 3 + out-transitions: [ a ] + jam-transitions: EOF [ \001-` b-\177 ] + + State #9 is non-accepting - + associated rule line numbers: + 3 + out-transitions: [ r ] + jam-transitions: EOF [ \001-q s-\177 ] + + Compressed tables always back up. + + The first few lines tell us that there's a scanner state in which it +can make a transition on an 'o' but not on any other character, and that +in that state the currently scanned text does not match any rule. The +state occurs when trying to match the rules found at lines 2 and 3 in +the input file. If the scanner is in that state and then reads +something other than an 'o', it will have to back up to find a rule +which is matched. With a bit of headscratching one can see that this +must be the state it's in when it has seen 'fo'. When this has +happened, if anything other than another 'o' is seen, the scanner will +have to back up to simply match the 'f' (by the default rule). + + The comment regarding State #8 indicates there's a problem when +'foob' has been scanned. Indeed, on any character other than an 'a', +the scanner will have to back up to accept "foo". Similarly, the +comment for State #9 concerns when 'fooba' has been scanned and an 'r' +does not follow. + + The final comment reminds us that there's no point going to all the +trouble of removing backing up from the rules unless we're using '-Cf' +or '-CF', since there's no performance gain doing so with compressed +scanners. + + The way to remove the backing up is to add "error" rules: + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + + fooba | + foob | + fo { + /* false alarm, not really a keyword */ + return TOK_ID; + } + + Eliminating backing up among a list of keywords can also be done +using a "catch-all" rule: + + %% + foo return TOK_KEYWORD; + foobar return TOK_KEYWORD; + + [a-z]+ return TOK_ID; + + This is usually the best solution when appropriate. + + Backing up messages tend to cascade. With a complicated set of rules +it's not uncommon to get hundreds of messages. If one can decipher +them, though, it often only takes a dozen or so rules to eliminate the +backing up (though it's easy to make a mistake and have an error rule +accidentally match a valid token. A possible future 'flex' feature will +be to automatically add rules to eliminate backing up). + + It's important to keep in mind that you gain the benefits of +eliminating backing up only if you eliminate _every_ instance of backing +up. Leaving just one means you gain nothing. + + _Variable_ trailing context (where both the leading and trailing +parts do not have a fixed length) entails almost the same performance +loss as 'REJECT' (i.e., substantial). So when possible a rule like: + + %% + mouse|rat/(cat|dog) run(); + + is better written: + + %% + mouse/cat|dog run(); + rat/cat|dog run(); + + or as + + %% + mouse|rat/cat run(); + mouse|rat/dog run(); + + Note that here the special '|' action does _not_ provide any savings, +and can even make things worse (*note Limitations::). + + Another area where the user can increase a scanner's performance (and +one that's easier to implement) arises from the fact that the longer the +tokens matched, the faster the scanner will run. This is because with +long tokens the processing of most input characters takes place in the +(short) inner scanning loop, and does not often have to go through the +additional work of setting up the scanning environment (e.g., 'yytext') +for the action. Recall the scanner for C comments: + + %x comment + %% + int line_num = 1; + + "/*" BEGIN(comment); + + <comment>[^*\n]* + <comment>"*"+[^*/\n]* + <comment>\n ++line_num; + <comment>"*"+"/" BEGIN(INITIAL); + + This could be sped up by writing it as: + + %x comment + %% + int line_num = 1; + + "/*" BEGIN(comment); + + <comment>[^*\n]* + <comment>[^*\n]*\n ++line_num; + <comment>"*"+[^*/\n]* + <comment>"*"+[^*/\n]*\n ++line_num; + <comment>"*"+"/" BEGIN(INITIAL); + + Now instead of each newline requiring the processing of another +action, recognizing the newlines is distributed over the other rules to +keep the matched text as long as possible. Note that _adding_ rules +does _not_ slow down the scanner! The speed of the scanner is +independent of the number of rules or (modulo the considerations given +at the beginning of this section) how complicated the rules are with +regard to operators such as '*' and '|'. + + A final example in speeding up a scanner: suppose you want to scan +through a file containing identifiers and keywords, one per line and +with no other extraneous characters, and recognize all the keywords. A +natural first approach is: + + %% + asm | + auto | + break | + ... etc ... + volatile | + while /* it's a keyword */ + + .|\n /* it's not a keyword */ + + To eliminate the back-tracking, introduce a catch-all rule: + + %% + asm | + auto | + break | + ... etc ... + volatile | + while /* it's a keyword */ + + [a-z]+ | + .|\n /* it's not a keyword */ + + Now, if it's guaranteed that there's exactly one word per line, then +we can reduce the total number of matches by a half by merging in the +recognition of newlines with that of the other tokens: + + %% + asm\n | + auto\n | + break\n | + ... etc ... + volatile\n | + while\n /* it's a keyword */ + + [a-z]+\n | + .|\n /* it's not a keyword */ + + One has to be careful here, as we have now reintroduced backing up +into the scanner. In particular, while _we_ know that there will never +be any characters in the input stream other than letters or newlines, +'flex' can't figure this out, and it will plan for possibly needing to +back up when it has scanned a token like 'auto' and then the next +character is something other than a newline or a letter. Previously it +would then just match the 'auto' rule and be done, but now it has no +'auto' rule, only a 'auto\n' rule. To eliminate the possibility of +backing up, we could either duplicate all rules but without final +newlines, or, since we never expect to encounter such an input and +therefore don't how it's classified, we can introduce one more catch-all +rule, this one which doesn't include a newline: + + %% + asm\n | + auto\n | + break\n | + ... etc ... + volatile\n | + while\n /* it's a keyword */ + + [a-z]+\n | + [a-z]+ | + .|\n /* it's not a keyword */ + + Compiled with '-Cf', this is about as fast as one can get a 'flex' +scanner to go for this particular problem. + + A final note: 'flex' is slow when matching 'NUL's, particularly when +a token contains multiple 'NUL's. It's best to write rules which match +_short_ amounts of text if it's anticipated that the text will often +include 'NUL's. + + Another final note regarding performance: as mentioned in *note +Matching::, dynamically resizing 'yytext' to accommodate huge tokens is +a slow process because it presently requires that the (huge) token be +rescanned from the beginning. Thus if performance is vital, you should +attempt to match "large" quantities of text but not "huge" quantities, +where the cutoff between the two is at about 8K characters per token. + + +File: flex.info, Node: Cxx, Next: Reentrant, Prev: Performance, Up: Top + +18 Generating C++ Scanners +************************** + +*IMPORTANT*: the present form of the scanning class is _experimental_ +and may change considerably between major releases. + + 'flex' provides two different ways to generate scanners for use with +C++. The first way is to simply compile a scanner generated by 'flex' +using a C++ compiler instead of a C compiler. You should not encounter +any compilation errors (*note Reporting Bugs::). You can then use C++ +code in your rule actions instead of C code. Note that the default +input source for your scanner remains 'yyin', and default echoing is +still done to 'yyout'. Both of these remain 'FILE *' variables and not +C++ _streams_. + + You can also use 'flex' to generate a C++ scanner class, using the +'-+' option (or, equivalently, '%option c++)', which is automatically +specified if the name of the 'flex' executable ends in a '+', such as +'flex++'. When using this option, 'flex' defaults to generating the +scanner to the file 'lex.yy.cc' instead of 'lex.yy.c'. The generated +scanner includes the header file 'FlexLexer.h', which defines the +interface to two C++ classes. + + The first class in 'FlexLexer.h', 'FlexLexer', provides an abstract +base class defining the general scanner class interface. It provides +the following member functions: + +'const char* YYText()' + returns the text of the most recently matched token, the equivalent + of 'yytext'. + +'int YYLeng()' + returns the length of the most recently matched token, the + equivalent of 'yyleng'. + +'int lineno() const' + returns the current input line number (see '%option yylineno)', or + '1' if '%option yylineno' was not used. + +'void set_debug( int flag )' + sets the debugging flag for the scanner, equivalent to assigning to + 'yy_flex_debug' (*note Scanner Options::). Note that you must + build the scanner using '%option debug' to include debugging + information in it. + +'int debug() const' + returns the current setting of the debugging flag. + + Also provided are member functions equivalent to +'yy_switch_to_buffer()', 'yy_create_buffer()' (though the first argument +is an 'istream&' object reference and not a 'FILE*)', +'yy_flush_buffer()', 'yy_delete_buffer()', and 'yyrestart()' (again, the +first argument is a 'istream&' object reference). + + The second class defined in 'FlexLexer.h' is 'yyFlexLexer', which is +derived from 'FlexLexer'. It defines the following additional member +functions: + +'yyFlexLexer( istream* arg_yyin = 0, ostream* arg_yyout = 0 )' +'yyFlexLexer( istream& arg_yyin, ostream& arg_yyout )' + constructs a 'yyFlexLexer' object using the given streams for input + and output. If not specified, the streams default to 'cin' and + 'cout', respectively. 'yyFlexLexer' does not take ownership of its + stream arguments. It's up to the user to ensure the streams + pointed to remain alive at least as long as the 'yyFlexLexer' + instance. + +'virtual int yylex()' + performs the same role is 'yylex()' does for ordinary 'flex' + scanners: it scans the input stream, consuming tokens, until a + rule's action returns a value. If you derive a subclass 'S' from + 'yyFlexLexer' and want to access the member functions and variables + of 'S' inside 'yylex()', then you need to use '%option yyclass="S"' + to inform 'flex' that you will be using that subclass instead of + 'yyFlexLexer'. In this case, rather than generating + 'yyFlexLexer::yylex()', 'flex' generates 'S::yylex()' (and also + generates a dummy 'yyFlexLexer::yylex()' that calls + 'yyFlexLexer::LexerError()' if called). + +'virtual void switch_streams(istream* new_in = 0, ostream* new_out = 0)' +'virtual void switch_streams(istream& new_in, ostream& new_out)' + reassigns 'yyin' to 'new_in' (if non-null) and 'yyout' to 'new_out' + (if non-null), deleting the previous input buffer if 'yyin' is + reassigned. + +'int yylex( istream* new_in, ostream* new_out = 0 )' +'int yylex( istream& new_in, ostream& new_out )' + first switches the input streams via 'switch_streams( new_in, + new_out )' and then returns the value of 'yylex()'. + + In addition, 'yyFlexLexer' defines the following protected virtual +functions which you can redefine in derived classes to tailor the +scanner: + +'virtual int LexerInput( char* buf, int max_size )' + reads up to 'max_size' characters into 'buf' and returns the number + of characters read. To indicate end-of-input, return 0 characters. + Note that 'interactive' scanners (see the '-B' and '-I' flags in + *note Scanner Options::) define the macro 'YY_INTERACTIVE'. If you + redefine 'LexerInput()' and need to take different actions + depending on whether or not the scanner might be scanning an + interactive input source, you can test for the presence of this + name via '#ifdef' statements. + +'virtual void LexerOutput( const char* buf, int size )' + writes out 'size' characters from the buffer 'buf', which, while + 'NUL'-terminated, may also contain internal 'NUL's if the scanner's + rules can match text with 'NUL's in them. + +'virtual void LexerError( const char* msg )' + reports a fatal error message. The default version of this + function writes the message to the stream 'cerr' and exits. + + Note that a 'yyFlexLexer' object contains its _entire_ scanning +state. Thus you can use such objects to create reentrant scanners, but +see also *note Reentrant::. You can instantiate multiple instances of +the same 'yyFlexLexer' class, and you can also combine multiple C++ +scanner classes together in the same program using the '-P' option +discussed above. + + Finally, note that the '%array' feature is not available to C++ +scanner classes; you must use '%pointer' (the default). + + Here is an example of a simple C++ scanner: + + // An example of using the flex C++ scanner class. + + %{ + #include <iostream> + using namespace std; + int mylineno = 0; + %} + + %option noyywrap c++ + + string \"[^\n"]+\" + + ws [ \t]+ + + alpha [A-Za-z] + dig [0-9] + name ({alpha}|{dig}|\$)({alpha}|{dig}|[_.\-/$])* + num1 [-+]?{dig}+\.?([eE][-+]?{dig}+)? + num2 [-+]?{dig}*\.{dig}+([eE][-+]?{dig}+)? + number {num1}|{num2} + + %% + + {ws} /* skip blanks and tabs */ + + "/*" { + int c; + + while((c = yyinput()) != 0) + { + if(c == '\n') + ++mylineno; + + else if(c == '*') + { + if((c = yyinput()) == '/') + break; + else + unput(c); + } + } + } + + {number} cout << "number " << YYText() << '\n'; + + \n mylineno++; + + {name} cout << "name " << YYText() << '\n'; + + {string} cout << "string " << YYText() << '\n'; + + %% + + // This include is required if main() is an another source file. + //#include <FlexLexer.h> + + int main( int /* argc */, char** /* argv */ ) + { + FlexLexer* lexer = new yyFlexLexer; + while(lexer->yylex() != 0) + ; + return 0; + } + + If you want to create multiple (different) lexer classes, you use the +'-P' flag (or the 'prefix=' option) to rename each 'yyFlexLexer' to some +other 'xxFlexLexer'. You then can include '<FlexLexer.h>' in your other +sources once per lexer class, first renaming 'yyFlexLexer' as follows: + + #undef yyFlexLexer + #define yyFlexLexer xxFlexLexer + #include <FlexLexer.h> + + #undef yyFlexLexer + #define yyFlexLexer zzFlexLexer + #include <FlexLexer.h> + + if, for example, you used '%option prefix="xx"' for one of your +scanners and '%option prefix="zz"' for the other. + + +File: flex.info, Node: Reentrant, Next: Lex and Posix, Prev: Cxx, Up: Top + +19 Reentrant C Scanners +*********************** + +'flex' has the ability to generate a reentrant C scanner. This is +accomplished by specifying '%option reentrant' ('-R') The generated +scanner is both portable, and safe to use in one or more separate +threads of control. The most common use for reentrant scanners is from +within multi-threaded applications. Any thread may create and execute a +reentrant 'flex' scanner without the need for synchronization with other +threads. + +* Menu: + +* Reentrant Uses:: +* Reentrant Overview:: +* Reentrant Example:: +* Reentrant Detail:: +* Reentrant Functions:: + + +File: flex.info, Node: Reentrant Uses, Next: Reentrant Overview, Prev: Reentrant, Up: Reentrant + +19.1 Uses for Reentrant Scanners +================================ + +However, there are other uses for a reentrant scanner. For example, you +could scan two or more files simultaneously to implement a 'diff' at the +token level (i.e., instead of at the character level): + + /* Example of maintaining more than one active scanner. */ + + do { + int tok1, tok2; + + tok1 = yylex( scanner_1 ); + tok2 = yylex( scanner_2 ); + + if( tok1 != tok2 ) + printf("Files are different."); + + } while ( tok1 && tok2 ); + + Another use for a reentrant scanner is recursion. (Note that a +recursive scanner can also be created using a non-reentrant scanner and +buffer states. *Note Multiple Input Buffers::.) + + The following crude scanner supports the 'eval' command by invoking +another instance of itself. + + /* Example of recursive invocation. */ + + %option reentrant + + %% + "eval(".+")" { + yyscan_t scanner; + YY_BUFFER_STATE buf; + + yylex_init( &scanner ); + yytext[yyleng-1] = ' '; + + buf = yy_scan_string( yytext + 5, scanner ); + yylex( scanner ); + + yy_delete_buffer(buf,scanner); + yylex_destroy( scanner ); + } + ... + %% + + +File: flex.info, Node: Reentrant Overview, Next: Reentrant Example, Prev: Reentrant Uses, Up: Reentrant + +19.2 An Overview of the Reentrant API +===================================== + +The API for reentrant scanners is different than for non-reentrant +scanners. Here is a quick overview of the API: + + '%option reentrant' must be specified. + + * All functions take one additional argument: 'yyscanner' + + * All global variables are replaced by their macro equivalents. (We + tell you this because it may be important to you during debugging.) + + * 'yylex_init' and 'yylex_destroy' must be called before and after + 'yylex', respectively. + + * Accessor methods (get/set functions) provide access to common + 'flex' variables. + + * User-specific data can be stored in 'yyextra'. + + +File: flex.info, Node: Reentrant Example, Next: Reentrant Detail, Prev: Reentrant Overview, Up: Reentrant + +19.3 Reentrant Example +====================== + +First, an example of a reentrant scanner: + /* This scanner prints "//" comments. */ + + %option reentrant stack noyywrap + %x COMMENT + + %% + + "//" yy_push_state( COMMENT, yyscanner); + .|\n + + <COMMENT>\n yy_pop_state( yyscanner ); + <COMMENT>[^\n]+ fprintf( yyout, "%s\n", yytext); + + %% + + int main ( int argc, char * argv[] ) + { + yyscan_t scanner; + + yylex_init ( &scanner ); + yylex ( scanner ); + yylex_destroy ( scanner ); + return 0; + } + + +File: flex.info, Node: Reentrant Detail, Next: Reentrant Functions, Prev: Reentrant Example, Up: Reentrant + +19.4 The Reentrant API in Detail +================================ + +Here are the things you need to do or know to use the reentrant C API of +'flex'. + +* Menu: + +* Specify Reentrant:: +* Extra Reentrant Argument:: +* Global Replacement:: +* Init and Destroy Functions:: +* Accessor Methods:: +* Extra Data:: +* About yyscan_t:: + + +File: flex.info, Node: Specify Reentrant, Next: Extra Reentrant Argument, Prev: Reentrant Detail, Up: Reentrant Detail + +19.4.1 Declaring a Scanner As Reentrant +--------------------------------------- + +%option reentrant (-reentrant) must be specified. + + Notice that '%option reentrant' is specified in the above example +(*note Reentrant Example::. Had this option not been specified, 'flex' +would have happily generated a non-reentrant scanner without +complaining. You may explicitly specify '%option noreentrant', if you +do _not_ want a reentrant scanner, although it is not necessary. The +default is to generate a non-reentrant scanner. + + +File: flex.info, Node: Extra Reentrant Argument, Next: Global Replacement, Prev: Specify Reentrant, Up: Reentrant Detail + +19.4.2 The Extra Argument +------------------------- + +All functions take one additional argument: 'yyscanner'. + + Notice that the calls to 'yy_push_state' and 'yy_pop_state' both have +an argument, 'yyscanner' , that is not present in a non-reentrant +scanner. Here are the declarations of 'yy_push_state' and +'yy_pop_state' in the reentrant scanner: + + static void yy_push_state ( int new_state , yyscan_t yyscanner ) ; + static void yy_pop_state ( yyscan_t yyscanner ) ; + + Notice that the argument 'yyscanner' appears in the declaration of +both functions. In fact, all 'flex' functions in a reentrant scanner +have this additional argument. It is always the last argument in the +argument list, it is always of type 'yyscan_t' (which is typedef'd to +'void *') and it is always named 'yyscanner'. As you may have guessed, +'yyscanner' is a pointer to an opaque data structure encapsulating the +current state of the scanner. For a list of function declarations, see +*note Reentrant Functions::. Note that preprocessor macros, such as +'BEGIN', 'ECHO', and 'REJECT', do not take this additional argument. + + +File: flex.info, Node: Global Replacement, Next: Init and Destroy Functions, Prev: Extra Reentrant Argument, Up: Reentrant Detail + +19.4.3 Global Variables Replaced By Macros +------------------------------------------ + +All global variables in traditional flex have been replaced by macro +equivalents. + + Note that in the above example, 'yyout' and 'yytext' are not plain +variables. These are macros that will expand to their equivalent +lvalue. All of the familiar 'flex' globals have been replaced by their +macro equivalents. In particular, 'yytext', 'yyleng', 'yylineno', +'yyin', 'yyout', 'yyextra', 'yylval', and 'yylloc' are macros. You may +safely use these macros in actions as if they were plain variables. We +only tell you this so you don't expect to link to these variables +externally. Currently, each macro expands to a member of an internal +struct, e.g., + + #define yytext (((struct yyguts_t*)yyscanner)->yytext_r) + + One important thing to remember about 'yytext' and friends is that +'yytext' is not a global variable in a reentrant scanner, you can not +access it directly from outside an action or from other functions. You +must use an accessor method, e.g., 'yyget_text', to accomplish this. +(See below). + + +File: flex.info, Node: Init and Destroy Functions, Next: Accessor Methods, Prev: Global Replacement, Up: Reentrant Detail + +19.4.4 Init and Destroy Functions +--------------------------------- + +'yylex_init' and 'yylex_destroy' must be called before and after +'yylex', respectively. + + int yylex_init ( yyscan_t * ptr_yy_globals ) ; + int yylex_init_extra ( YY_EXTRA_TYPE user_defined, yyscan_t * ptr_yy_globals ) ; + int yylex ( yyscan_t yyscanner ) ; + int yylex_destroy ( yyscan_t yyscanner ) ; + + The function 'yylex_init' must be called before calling any other +function. The argument to 'yylex_init' is the address of an +uninitialized pointer to be filled in by 'yylex_init', overwriting any +previous contents. The function 'yylex_init_extra' may be used instead, +taking as its first argument a variable of type 'YY_EXTRA_TYPE'. See +the section on yyextra, below, for more details. + + The value stored in 'ptr_yy_globals' should thereafter be passed to +'yylex' and 'yylex_destroy'. Flex does not save the argument passed to +'yylex_init', so it is safe to pass the address of a local pointer to +'yylex_init' so long as it remains in scope for the duration of all +calls to the scanner, up to and including the call to 'yylex_destroy'. + + The function 'yylex' should be familiar to you by now. The reentrant +version takes one argument, which is the value returned (via an +argument) by 'yylex_init'. Otherwise, it behaves the same as the +non-reentrant version of 'yylex'. + + Both 'yylex_init' and 'yylex_init_extra' returns 0 (zero) on success, +or non-zero on failure, in which case errno is set to one of the +following values: + + * ENOMEM Memory allocation error. *Note memory-management::. + * EINVAL Invalid argument. + + The function 'yylex_destroy' should be called to free resources used +by the scanner. After 'yylex_destroy' is called, the contents of +'yyscanner' should not be used. Of course, there is no need to destroy +a scanner if you plan to reuse it. A 'flex' scanner (both reentrant and +non-reentrant) may be restarted by calling 'yyrestart'. + + Below is an example of a program that creates a scanner, uses it, +then destroys it when done: + + int main () + { + yyscan_t scanner; + int tok; + + yylex_init(&scanner); + + while ((tok=yylex(scanner)) > 0) + printf("tok=%d yytext=%s\n", tok, yyget_text(scanner)); + + yylex_destroy(scanner); + return 0; + } + + +File: flex.info, Node: Accessor Methods, Next: Extra Data, Prev: Init and Destroy Functions, Up: Reentrant Detail + +19.4.5 Accessing Variables with Reentrant Scanners +-------------------------------------------------- + +Accessor methods (get/set functions) provide access to common 'flex' +variables. + + Many scanners that you build will be part of a larger project. +Portions of your project will need access to 'flex' values, such as +'yytext'. In a non-reentrant scanner, these values are global, so there +is no problem accessing them. However, in a reentrant scanner, there +are no global 'flex' values. You can not access them directly. +Instead, you must access 'flex' values using accessor methods (get/set +functions). Each accessor method is named 'yyget_NAME' or 'yyset_NAME', +where 'NAME' is the name of the 'flex' variable you want. For example: + + /* Set the last character of yytext to NULL. */ + void chop ( yyscan_t scanner ) + { + int len = yyget_leng( scanner ); + yyget_text( scanner )[len - 1] = '\0'; + } + + The above code may be called from within an action like this: + + %% + .+\n { chop( yyscanner );} + + You may find that '%option header-file' is particularly useful for +generating prototypes of all the accessor functions. *Note +option-header::. + + +File: flex.info, Node: Extra Data, Next: About yyscan_t, Prev: Accessor Methods, Up: Reentrant Detail + +19.4.6 Extra Data +----------------- + +User-specific data can be stored in 'yyextra'. + + In a reentrant scanner, it is unwise to use global variables to +communicate with or maintain state between different pieces of your +program. However, you may need access to external data or invoke +external functions from within the scanner actions. Likewise, you may +need to pass information to your scanner (e.g., open file descriptors, +or database connections). In a non-reentrant scanner, the only way to +do this would be through the use of global variables. 'Flex' allows you +to store arbitrary, "extra" data in a scanner. This data is accessible +through the accessor methods 'yyget_extra' and 'yyset_extra' from +outside the scanner, and through the shortcut macro 'yyextra' from +within the scanner itself. They are defined as follows: + + #define YY_EXTRA_TYPE void* + YY_EXTRA_TYPE yyget_extra ( yyscan_t scanner ); + void yyset_extra ( YY_EXTRA_TYPE arbitrary_data , yyscan_t scanner); + + In addition, an extra form of 'yylex_init' is provided, +'yylex_init_extra'. This function is provided so that the yyextra value +can be accessed from within the very first yyalloc, used to allocate the +scanner itself. + + By default, 'YY_EXTRA_TYPE' is defined as type 'void *'. You may +redefine this type using '%option extra-type="your_type"' in the +scanner: + + /* An example of overriding YY_EXTRA_TYPE. */ + %{ + #include <sys/stat.h> + #include <unistd.h> + %} + %option reentrant + %option extra-type="struct stat *" + %% + + __filesize__ printf( "%ld", yyextra->st_size ); + __lastmod__ printf( "%ld", yyextra->st_mtime ); + %% + void scan_file( char* filename ) + { + yyscan_t scanner; + struct stat buf; + FILE *in; + + in = fopen( filename, "r" ); + stat( filename, &buf ); + + yylex_init_extra( buf, &scanner ); + yyset_in( in, scanner ); + yylex( scanner ); + yylex_destroy( scanner ); + + fclose( in ); + } + + +File: flex.info, Node: About yyscan_t, Prev: Extra Data, Up: Reentrant Detail + +19.4.7 About yyscan_t +--------------------- + +'yyscan_t' is defined as: + + typedef void* yyscan_t; + + It is initialized by 'yylex_init()' to point to an internal +structure. You should never access this value directly. In particular, +you should never attempt to free it (use 'yylex_destroy()' instead.) + + +File: flex.info, Node: Reentrant Functions, Prev: Reentrant Detail, Up: Reentrant + +19.5 Functions and Macros Available in Reentrant C Scanners +=========================================================== + +The following Functions are available in a reentrant scanner: + + char *yyget_text ( yyscan_t scanner ); + int yyget_leng ( yyscan_t scanner ); + FILE *yyget_in ( yyscan_t scanner ); + FILE *yyget_out ( yyscan_t scanner ); + int yyget_lineno ( yyscan_t scanner ); + YY_EXTRA_TYPE yyget_extra ( yyscan_t scanner ); + int yyget_debug ( yyscan_t scanner ); + + void yyset_debug ( int flag, yyscan_t scanner ); + void yyset_in ( FILE * in_str , yyscan_t scanner ); + void yyset_out ( FILE * out_str , yyscan_t scanner ); + void yyset_lineno ( int line_number , yyscan_t scanner ); + void yyset_extra ( YY_EXTRA_TYPE user_defined , yyscan_t scanner ); + + There are no "set" functions for yytext and yyleng. This is +intentional. + + The following Macro shortcuts are available in actions in a reentrant +scanner: + + yytext + yyleng + yyin + yyout + yylineno + yyextra + yy_flex_debug + + In a reentrant C scanner, support for yylineno is always present +(i.e., you may access yylineno), but the value is never modified by +'flex' unless '%option yylineno' is enabled. This is to allow the user +to maintain the line count independently of 'flex'. + + The following functions and macros are made available when '%option +bison-bridge' ('--bison-bridge') is specified: + + YYSTYPE * yyget_lval ( yyscan_t scanner ); + void yyset_lval ( YYSTYPE * yylvalp , yyscan_t scanner ); + yylval + + The following functions and macros are made available when '%option +bison-locations' ('--bison-locations') is specified: + + YYLTYPE *yyget_lloc ( yyscan_t scanner ); + void yyset_lloc ( YYLTYPE * yyllocp , yyscan_t scanner ); + yylloc + + Support for yylval assumes that 'YYSTYPE' is a valid type. Support +for yylloc assumes that 'YYSLYPE' is a valid type. Typically, these +types are generated by 'bison', and are included in section 1 of the +'flex' input. + + +File: flex.info, Node: Lex and Posix, Next: Memory Management, Prev: Reentrant, Up: Top + +20 Incompatibilities with Lex and Posix +*************************************** + +'flex' is a rewrite of the AT&T Unix _lex_ tool (the two implementations +do not share any code, though), with some extensions and +incompatibilities, both of which are of concern to those who wish to +write scanners acceptable to both implementations. 'flex' is fully +compliant with the POSIX 'lex' specification, except that when using +'%pointer' (the default), a call to 'unput()' destroys the contents of +'yytext', which is counter to the POSIX specification. In this section +we discuss all of the known areas of incompatibility between 'flex', +AT&T 'lex', and the POSIX specification. 'flex''s '-l' option turns on +maximum compatibility with the original AT&T 'lex' implementation, at +the cost of a major loss in the generated scanner's performance. We +note below which incompatibilities can be overcome using the '-l' +option. 'flex' is fully compatible with 'lex' with the following +exceptions: + + * The undocumented 'lex' scanner internal variable 'yylineno' is not + supported unless '-l' or '%option yylineno' is used. + + * 'yylineno' should be maintained on a per-buffer basis, rather than + a per-scanner (single global variable) basis. + + * 'yylineno' is not part of the POSIX specification. + + * The 'input()' routine is not redefinable, though it may be called + to read characters following whatever has been matched by a rule. + If 'input()' encounters an end-of-file the normal 'yywrap()' + processing is done. A "real" end-of-file is returned by 'input()' + as 'EOF'. + + * Input is instead controlled by defining the 'YY_INPUT()' macro. + + * The 'flex' restriction that 'input()' cannot be redefined is in + accordance with the POSIX specification, which simply does not + specify any way of controlling the scanner's input other than by + making an initial assignment to 'yyin'. + + * The 'unput()' routine is not redefinable. This restriction is in + accordance with POSIX. + + * 'flex' scanners are not as reentrant as 'lex' scanners. In + particular, if you have an interactive scanner and an interrupt + handler which long-jumps out of the scanner, and the scanner is + subsequently called again, you may get the following message: + + fatal flex scanner internal error--end of buffer missed + + To reenter the scanner, first use: + + yyrestart( yyin ); + + Note that this call will throw away any buffered input; usually + this isn't a problem with an interactive scanner. *Note + Reentrant::, for 'flex''s reentrant API. + + * Also note that 'flex' C++ scanner classes _are_ reentrant, so if + using C++ is an option for you, you should use them instead. *Note + Cxx::, and *note Reentrant:: for details. + + * 'output()' is not supported. Output from the ECHO macro is done to + the file-pointer 'yyout' (default 'stdout)'. + + * 'output()' is not part of the POSIX specification. + + * 'lex' does not support exclusive start conditions (%x), though they + are in the POSIX specification. + + * When definitions are expanded, 'flex' encloses them in parentheses. + With 'lex', the following: + + NAME [A-Z][A-Z0-9]* + %% + foo{NAME}? printf( "Found it\n" ); + %% + + will not match the string 'foo' because when the macro is expanded + the rule is equivalent to 'foo[A-Z][A-Z0-9]*?' and the precedence + is such that the '?' is associated with '[A-Z0-9]*'. With 'flex', + the rule will be expanded to 'foo([A-Z][A-Z0-9]*)?' and so the + string 'foo' will match. + + * Note that if the definition begins with '^' or ends with '$' then + it is _not_ expanded with parentheses, to allow these operators to + appear in definitions without losing their special meanings. But + the '<s>', '/', and '<<EOF>>' operators cannot be used in a 'flex' + definition. + + * Using '-l' results in the 'lex' behavior of no parentheses around + the definition. + + * The POSIX specification is that the definition be enclosed in + parentheses. + + * Some implementations of 'lex' allow a rule's action to begin on a + separate line, if the rule's pattern has trailing whitespace: + + %% + foo|bar<space here> + { foobar_action();} + + 'flex' does not support this feature. + + * The 'lex' '%r' (generate a Ratfor scanner) option is not supported. + It is not part of the POSIX specification. + + * After a call to 'unput()', _yytext_ is undefined until the next + token is matched, unless the scanner was built using '%array'. + This is not the case with 'lex' or the POSIX specification. The + '-l' option does away with this incompatibility. + + * The precedence of the '{,}' (numeric range) operator is different. + The AT&T and POSIX specifications of 'lex' interpret 'abc{1,3}' as + match one, two, or three occurrences of 'abc'", whereas 'flex' + interprets it as "match 'ab' followed by one, two, or three + occurrences of 'c'". The '-l' and '--posix' options do away with + this incompatibility. + + * The precedence of the '^' operator is different. 'lex' interprets + '^foo|bar' as "match either 'foo' at the beginning of a line, or + 'bar' anywhere", whereas 'flex' interprets it as "match either + 'foo' or 'bar' if they come at the beginning of a line". The + latter is in agreement with the POSIX specification. + + * The special table-size declarations such as '%a' supported by 'lex' + are not required by 'flex' scanners.. 'flex' ignores them. + * The name 'FLEX_SCANNER' is '#define''d so scanners may be written + for use with either 'flex' or 'lex'. Scanners also include + 'YY_FLEX_MAJOR_VERSION', 'YY_FLEX_MINOR_VERSION' and + 'YY_FLEX_SUBMINOR_VERSION' indicating which version of 'flex' + generated the scanner. For example, for the 2.5.22 release, these + defines would be 2, 5 and 22 respectively. If the version of + 'flex' being used is a beta version, then the symbol 'FLEX_BETA' is + defined. + + * The symbols '[[' and ']]' in the code sections of the input may + conflict with the m4 delimiters. *Note M4 Dependency::. + + The following 'flex' features are not included in 'lex' or the POSIX +specification: + + * C++ scanners + * %option + * start condition scopes + * start condition stacks + * interactive/non-interactive scanners + * yy_scan_string() and friends + * yyterminate() + * yy_set_interactive() + * yy_set_bol() + * YY_AT_BOL() <<EOF>> + * <*> + * YY_DECL + * YY_START + * YY_USER_ACTION + * YY_USER_INIT + * #line directives + * %{}'s around actions + * reentrant C API + * multiple actions on a line + * almost all of the 'flex' command-line options + + The feature "multiple actions on a line" refers to the fact that with +'flex' you can put multiple actions on the same line, separated with +semi-colons, while with 'lex', the following: + + foo handle_foo(); ++num_foos_seen; + + is (rather surprisingly) truncated to + + foo handle_foo(); + + 'flex' does not truncate the action. Actions that are not enclosed +in braces are simply terminated at the end of the line. + + +File: flex.info, Node: Memory Management, Next: Serialized Tables, Prev: Lex and Posix, Up: Top + +21 Memory Management +******************** + +This chapter describes how flex handles dynamic memory, and how you can +override the default behavior. + +* Menu: + +* The Default Memory Management:: +* Overriding The Default Memory Management:: +* A Note About yytext And Memory:: + + +File: flex.info, Node: The Default Memory Management, Next: Overriding The Default Memory Management, Prev: Memory Management, Up: Memory Management + +21.1 The Default Memory Management +================================== + +Flex allocates dynamic memory during initialization, and once in a while +from within a call to yylex(). Initialization takes place during the +first call to yylex(). Thereafter, flex may reallocate more memory if +it needs to enlarge a buffer. As of version 2.5.9 Flex will clean up +all memory when you call 'yylex_destroy' *Note faq-memory-leak::. + + Flex allocates dynamic memory for four purposes, listed below (1) + +16kB for the input buffer. + Flex allocates memory for the character buffer used to perform + pattern matching. Flex must read ahead from the input stream and + store it in a large character buffer. This buffer is typically the + largest chunk of dynamic memory flex consumes. This buffer will + grow if necessary, doubling the size each time. Flex frees this + memory when you call yylex_destroy(). The default size of this + buffer (16384 bytes) is almost always too large. The ideal size + for this buffer is the length of the longest token expected, in + bytes, plus a little more. Flex will allocate a few extra bytes + for housekeeping. Currently, to override the size of the input + buffer you must '#define YY_BUF_SIZE' to whatever number of bytes + you want. We don't plan to change this in the near future, but we + reserve the right to do so if we ever add a more robust memory + management API. + +64kb for the REJECT state. This will only be allocated if you use REJECT. + The size is large enough to hold the same number of states as + characters in the input buffer. If you override the size of the + input buffer (via 'YY_BUF_SIZE'), then you automatically override + the size of this buffer as well. + +100 bytes for the start condition stack. + Flex allocates memory for the start condition stack. This is the + stack used for pushing start states, i.e., with yy_push_state(). + It will grow if necessary. Since the states are simply integers, + this stack doesn't consume much memory. This stack is not present + if '%option stack' is not specified. You will rarely need to tune + this buffer. The ideal size for this stack is the maximum depth + expected. The memory for this stack is automatically destroyed + when you call yylex_destroy(). *Note option-stack::. + +40 bytes for each YY_BUFFER_STATE. + Flex allocates memory for each YY_BUFFER_STATE. The buffer state + itself is about 40 bytes, plus an additional large character buffer + (described above.) The initial buffer state is created during + initialization, and with each call to yy_create_buffer(). You + can't tune the size of this, but you can tune the character buffer + as described above. Any buffer state that you explicitly create by + calling yy_create_buffer() is _NOT_ destroyed automatically. You + must call yy_delete_buffer() to free the memory. The exception to + this rule is that flex will delete the current buffer automatically + when you call yylex_destroy(). If you delete the current buffer, + be sure to set it to NULL. That way, flex will not try to delete + the buffer a second time (possibly crashing your program!) At the + time of this writing, flex does not provide a growable stack for + the buffer states. You have to manage that yourself. *Note + Multiple Input Buffers::. + +84 bytes for the reentrant scanner guts + Flex allocates about 84 bytes for the reentrant scanner structure + when you call yylex_init(). It is destroyed when the user calls + yylex_destroy(). + + ---------- Footnotes ---------- + + (1) The quantities given here are approximate, and may vary due to +host architecture, compiler configuration, or due to future enhancements +to flex. + + +File: flex.info, Node: Overriding The Default Memory Management, Next: A Note About yytext And Memory, Prev: The Default Memory Management, Up: Memory Management + +21.2 Overriding The Default Memory Management +============================================= + +Flex calls the functions 'yyalloc', 'yyrealloc', and 'yyfree' when it +needs to allocate or free memory. By default, these functions are +wrappers around the standard C functions, 'malloc', 'realloc', and +'free', respectively. You can override the default implementations by +telling flex that you will provide your own implementations. + + To override the default implementations, you must do two things: + + 1. Suppress the default implementations by specifying one or more of + the following options: + + * '%option noyyalloc' + * '%option noyyrealloc' + * '%option noyyfree'. + + 2. Provide your own implementation of the following functions: (1) + + // For a non-reentrant scanner + void * yyalloc (size_t bytes); + void * yyrealloc (void * ptr, size_t bytes); + void yyfree (void * ptr); + + // For a reentrant scanner + void * yyalloc (size_t bytes, void * yyscanner); + void * yyrealloc (void * ptr, size_t bytes, void * yyscanner); + void yyfree (void * ptr, void * yyscanner); + + In the following example, we will override all three memory routines. +We assume that there is a custom allocator with garbage collection. In +order to make this example interesting, we will use a reentrant scanner, +passing a pointer to the custom allocator through 'yyextra'. + + %{ + #include "some_allocator.h" + %} + + /* Suppress the default implementations. */ + %option noyyalloc noyyrealloc noyyfree + %option reentrant + + /* Initialize the allocator. */ + %{ + #define YY_EXTRA_TYPE struct allocator* + #define YY_USER_INIT yyextra = allocator_create(); + %} + + %% + .|\n ; + %% + + /* Provide our own implementations. */ + void * yyalloc (size_t bytes, void* yyscanner) { + return allocator_alloc (yyextra, bytes); + } + + void * yyrealloc (void * ptr, size_t bytes, void* yyscanner) { + return allocator_realloc (yyextra, bytes); + } + + void yyfree (void * ptr, void * yyscanner) { + /* Do nothing -- we leave it to the garbage collector. */ + } + + + ---------- Footnotes ---------- + + (1) It is not necessary to override all (or any) of the memory +management routines. You may, for example, override 'yyrealloc', but +not 'yyfree' or 'yyalloc'. + + +File: flex.info, Node: A Note About yytext And Memory, Prev: Overriding The Default Memory Management, Up: Memory Management + +21.3 A Note About yytext And Memory +=================================== + +When flex finds a match, 'yytext' points to the first character of the +match in the input buffer. The string itself is part of the input +buffer, and is _NOT_ allocated separately. The value of yytext will be +overwritten the next time yylex() is called. In short, the value of +yytext is only valid from within the matched rule's action. + + Often, you want the value of yytext to persist for later processing, +i.e., by a parser with non-zero lookahead. In order to preserve yytext, +you will have to copy it with strdup() or a similar function. But this +introduces some headache because your parser is now responsible for +freeing the copy of yytext. If you use a yacc or bison parser, +(commonly used with flex), you will discover that the error recovery +mechanisms can cause memory to be leaked. + + To prevent memory leaks from strdup'd yytext, you will have to track +the memory somehow. Our experience has shown that a garbage collection +mechanism or a pooled memory mechanism will save you a lot of grief when +writing parsers. + + +File: flex.info, Node: Serialized Tables, Next: Diagnostics, Prev: Memory Management, Up: Top + +22 Serialized Tables +******************** + +A 'flex' scanner has the ability to save the DFA tables to a file, and +load them at runtime when needed. The motivation for this feature is to +reduce the runtime memory footprint. Traditionally, these tables have +been compiled into the scanner as C arrays, and are sometimes quite +large. Since the tables are compiled into the scanner, the memory used +by the tables can never be freed. This is a waste of memory, especially +if an application uses several scanners, but none of them at the same +time. + + The serialization feature allows the tables to be loaded at runtime, +before scanning begins. The tables may be discarded when scanning is +finished. + +* Menu: + +* Creating Serialized Tables:: +* Loading and Unloading Serialized Tables:: +* Tables File Format:: + + +File: flex.info, Node: Creating Serialized Tables, Next: Loading and Unloading Serialized Tables, Prev: Serialized Tables, Up: Serialized Tables + +22.1 Creating Serialized Tables +=============================== + +You may create a scanner with serialized tables by specifying: + + %option tables-file=FILE + or + --tables-file=FILE + + These options instruct flex to save the DFA tables to the file FILE. +The tables will _not_ be embedded in the generated scanner. The scanner +will not function on its own. The scanner will be dependent upon the +serialized tables. You must load the tables from this file at runtime +before you can scan anything. + + If you do not specify a filename to '--tables-file', the tables will +be saved to 'lex.yy.tables', where 'yy' is the appropriate prefix. + + If your project uses several different scanners, you can concatenate +the serialized tables into one file, and flex will find the correct set +of tables, using the scanner prefix as part of the lookup key. An +example follows: + + $ flex --tables-file --prefix=cpp cpp.l + $ flex --tables-file --prefix=c c.l + $ cat lex.cpp.tables lex.c.tables > all.tables + + The above example created two scanners, 'cpp', and 'c'. Since we did +not specify a filename, the tables were serialized to 'lex.c.tables' and +'lex.cpp.tables', respectively. Then, we concatenated the two files +together into 'all.tables', which we will distribute with our project. +At runtime, we will open the file and tell flex to load the tables from +it. Flex will find the correct tables automatically. (See next +section). + + +File: flex.info, Node: Loading and Unloading Serialized Tables, Next: Tables File Format, Prev: Creating Serialized Tables, Up: Serialized Tables + +22.2 Loading and Unloading Serialized Tables +============================================ + +If you've built your scanner with '%option tables-file', then you must +load the scanner tables at runtime. This can be accomplished with the +following function: + + -- Function: int yytables_fload (FILE* FP [, yyscan_t SCANNER]) + Locates scanner tables in the stream pointed to by FP and loads + them. Memory for the tables is allocated via 'yyalloc'. You must + call this function before the first call to 'yylex'. The argument + SCANNER only appears in the reentrant scanner. This function + returns '0' (zero) on success, or non-zero on error. + + The loaded tables are *not* automatically destroyed (unloaded) when +you call 'yylex_destroy'. The reason is that you may create several +scanners of the same type (in a reentrant scanner), each of which needs +access to these tables. To avoid a nasty memory leak, you must call the +following function: + + -- Function: int yytables_destroy ([yyscan_t SCANNER]) + Unloads the scanner tables. The tables must be loaded again before + you can scan any more data. The argument SCANNER only appears in + the reentrant scanner. This function returns '0' (zero) on + success, or non-zero on error. + + *The functions 'yytables_fload' and 'yytables_destroy' are not +thread-safe.* You must ensure that these functions are called exactly +once (for each scanner type) in a threaded program, before any thread +calls 'yylex'. After the tables are loaded, they are never written to, +and no thread protection is required thereafter - until you destroy +them. + + +File: flex.info, Node: Tables File Format, Prev: Loading and Unloading Serialized Tables, Up: Serialized Tables + +22.3 Tables File Format +======================= + +This section defines the file format of serialized 'flex' tables. + + The tables format allows for one or more sets of tables to be +specified, where each set corresponds to a given scanner. Scanners are +indexed by name, as described below. The file format is as follows: + + TABLE SET 1 + +-------------------------------+ + Header | uint32 th_magic; | + | uint32 th_hsize; | + | uint32 th_ssize; | + | uint16 th_flags; | + | char th_version[]; | + | char th_name[]; | + | uint8 th_pad64[]; | + +-------------------------------+ + Table 1 | uint16 td_id; | + | uint16 td_flags; | + | uint32 td_hilen; | + | uint32 td_lolen; | + | void td_data[]; | + | uint8 td_pad64[]; | + +-------------------------------+ + Table 2 | | + . . . + . . . + . . . + . . . + Table n | | + +-------------------------------+ + TABLE SET 2 + . + . + . + TABLE SET N + + The above diagram shows that a complete set of tables consists of a +header followed by multiple individual tables. Furthermore, multiple +complete sets may be present in the same file, each set with its own +header and tables. The sets are contiguous in the file. The only way +to know if another set follows is to check the next four bytes for the +magic number (or check for EOF). The header and tables sections are +padded to 64-bit boundaries. Below we describe each field in detail. +This format does not specify how the scanner will expand the given data, +i.e., data may be serialized as int8, but expanded to an int32 array at +runtime. This is to reduce the size of the serialized data where +possible. Remember, _all integer values are in network byte order_. + +Fields of a table header: + +'th_magic' + Magic number, always 0xF13C57B1. + +'th_hsize' + Size of this entire header, in bytes, including all fields plus any + padding. + +'th_ssize' + Size of this entire set, in bytes, including the header, all + tables, plus any padding. + +'th_flags' + Bit flags for this table set. Currently unused. + +'th_version[]' + Flex version in NULL-terminated string format. e.g., '2.5.13a'. + This is the version of flex that was used to create the serialized + tables. + +'th_name[]' + Contains the name of this table set. The default is 'yytables', + and is prefixed accordingly, e.g., 'footables'. Must be + NULL-terminated. + +'th_pad64[]' + Zero or more NULL bytes, padding the entire header to the next + 64-bit boundary as calculated from the beginning of the header. + +Fields of a table: + +'td_id' + Specifies the table identifier. Possible values are: + 'YYTD_ID_ACCEPT (0x01)' + 'yy_accept' + 'YYTD_ID_BASE (0x02)' + 'yy_base' + 'YYTD_ID_CHK (0x03)' + 'yy_chk' + 'YYTD_ID_DEF (0x04)' + 'yy_def' + 'YYTD_ID_EC (0x05)' + 'yy_ec ' + 'YYTD_ID_META (0x06)' + 'yy_meta' + 'YYTD_ID_NUL_TRANS (0x07)' + 'yy_NUL_trans' + 'YYTD_ID_NXT (0x08)' + 'yy_nxt'. This array may be two dimensional. See the + 'td_hilen' field below. + 'YYTD_ID_RULE_CAN_MATCH_EOL (0x09)' + 'yy_rule_can_match_eol' + 'YYTD_ID_START_STATE_LIST (0x0A)' + 'yy_start_state_list'. This array is handled specially + because it is an array of pointers to structs. See the + 'td_flags' field below. + 'YYTD_ID_TRANSITION (0x0B)' + 'yy_transition'. This array is handled specially because it + is an array of structs. See the 'td_lolen' field below. + 'YYTD_ID_ACCLIST (0x0C)' + 'yy_acclist' + +'td_flags' + Bit flags describing how to interpret the data in 'td_data'. The + data arrays are one-dimensional by default, but may be two + dimensional as specified in the 'td_hilen' field. + + 'YYTD_DATA8 (0x01)' + The data is serialized as an array of type int8. + 'YYTD_DATA16 (0x02)' + The data is serialized as an array of type int16. + 'YYTD_DATA32 (0x04)' + The data is serialized as an array of type int32. + 'YYTD_PTRANS (0x08)' + The data is a list of indexes of entries in the expanded + 'yy_transition' array. Each index should be expanded to a + pointer to the corresponding entry in the 'yy_transition' + array. We count on the fact that the 'yy_transition' array + has already been seen. + 'YYTD_STRUCT (0x10)' + The data is a list of yy_trans_info structs, each of which + consists of two integers. There is no padding between struct + elements or between structs. The type of each member is + determined by the 'YYTD_DATA*' bits. + +'td_hilen' + If 'td_hilen' is non-zero, then the data is a two-dimensional + array. Otherwise, the data is a one-dimensional array. 'td_hilen' + contains the number of elements in the higher dimensional array, + and 'td_lolen' contains the number of elements in the lowest + dimension. + + Conceptually, 'td_data' is either 'sometype td_data[td_lolen]', or + 'sometype td_data[td_hilen][td_lolen]', where 'sometype' is + specified by the 'td_flags' field. It is possible for both + 'td_lolen' and 'td_hilen' to be zero, in which case 'td_data' is a + zero length array, and no data is loaded, i.e., this table is + simply skipped. Flex does not currently generate tables of zero + length. + +'td_lolen' + Specifies the number of elements in the lowest dimension array. If + this is a one-dimensional array, then it is simply the number of + elements in this array. The element size is determined by the + 'td_flags' field. + +'td_data[]' + The table data. This array may be a one- or two-dimensional array, + of type 'int8', 'int16', 'int32', 'struct yy_trans_info', or + 'struct yy_trans_info*', depending upon the values in the + 'td_flags', 'td_hilen', and 'td_lolen' fields. + +'td_pad64[]' + Zero or more NULL bytes, padding the entire table to the next + 64-bit boundary as calculated from the beginning of this table. + + +File: flex.info, Node: Diagnostics, Next: Limitations, Prev: Serialized Tables, Up: Top + +23 Diagnostics +************** + +The following is a list of 'flex' diagnostic messages: + + * 'warning, rule cannot be matched' indicates that the given rule + cannot be matched because it follows other rules that will always + match the same text as it. For example, in the following 'foo' + cannot be matched because it comes after an identifier "catch-all" + rule: + + [a-z]+ got_identifier(); + foo got_foo(); + + Using 'REJECT' in a scanner suppresses this warning. + + * 'warning, -s option given but default rule can be matched' means + that it is possible (perhaps only in a particular start condition) + that the default rule (match any single character) is the only one + that will match a particular input. Since '-s' was given, + presumably this is not intended. + + * 'reject_used_but_not_detected undefined' or + 'yymore_used_but_not_detected undefined'. These errors can occur + at compile time. They indicate that the scanner uses 'REJECT' or + 'yymore()' but that 'flex' failed to notice the fact, meaning that + 'flex' scanned the first two sections looking for occurrences of + these actions and failed to find any, but somehow you snuck some in + (via a #include file, for example). Use '%option reject' or + '%option yymore' to indicate to 'flex' that you really do use these + features. + + * 'flex scanner jammed'. a scanner compiled with '-s' has + encountered an input string which wasn't matched by any of its + rules. This error can also occur due to internal problems. + + * 'token too large, exceeds YYLMAX'. your scanner uses '%array' and + one of its rules matched a string longer than the 'YYLMAX' constant + (8K bytes by default). You can increase the value by #define'ing + 'YYLMAX' in the definitions section of your 'flex' input. + + * 'scanner requires -8 flag to use the character 'x''. Your scanner + specification includes recognizing the 8-bit character ''x'' and + you did not specify the -8 flag, and your scanner defaulted to + 7-bit because you used the '-Cf' or '-CF' table compression + options. See the discussion of the '-7' flag, *note Scanner + Options::, for details. + + * 'flex scanner push-back overflow'. you used 'unput()' to push back + so much text that the scanner's buffer could not hold both the + pushed-back text and the current token in 'yytext'. Ideally the + scanner should dynamically resize the buffer in this case, but at + present it does not. + + * 'input buffer overflow, can't enlarge buffer because scanner uses + REJECT'. the scanner was working on matching an extremely large + token and needed to expand the input buffer. This doesn't work + with scanners that use 'REJECT'. + + * 'fatal flex scanner internal error--end of buffer missed'. This + can occur in a scanner which is reentered after a long-jump has + jumped out (or over) the scanner's activation frame. Before + reentering the scanner, use: + yyrestart( yyin ); + or, as noted above, switch to using the C++ scanner class. + + * 'too many start conditions in <> construct!' you listed more start + conditions in a <> construct than exist (so you must have listed at + least one of them twice). + + +File: flex.info, Node: Limitations, Next: Bibliography, Prev: Diagnostics, Up: Top + +24 Limitations +************** + +Some trailing context patterns cannot be properly matched and generate +warning messages ('dangerous trailing context'). These are patterns +where the ending of the first part of the rule matches the beginning of +the second part, such as 'zx*/xy*', where the 'x*' matches the 'x' at +the beginning of the trailing context. (Note that the POSIX draft +states that the text matched by such patterns is undefined.) For some +trailing context rules, parts which are actually fixed-length are not +recognized as such, leading to the abovementioned performance loss. In +particular, parts using '|' or '{n}' (such as 'foo{3}') are always +considered variable-length. Combining trailing context with the special +'|' action can result in _fixed_ trailing context being turned into the +more expensive _variable_ trailing context. For example, in the +following: + + %% + abc | + xyz/def + + Use of 'unput()' invalidates yytext and yyleng, unless the '%array' +directive or the '-l' option has been used. Pattern-matching of 'NUL's +is substantially slower than matching other characters. Dynamic +resizing of the input buffer is slow, as it entails rescanning all the +text matched so far by the current (generally huge) token. Due to both +buffering of input and read-ahead, you cannot intermix calls to +'<stdio.h>' routines, such as, getchar(), with 'flex' rules and expect +it to work. Call 'input()' instead. The total table entries listed by +the '-v' flag excludes the number of table entries needed to determine +what rule has been matched. The number of entries is equal to the +number of DFA states if the scanner does not use 'REJECT', and somewhat +greater than the number of states if it does. 'REJECT' cannot be used +with the '-f' or '-F' options. + + The 'flex' internal algorithms need documentation. + + +File: flex.info, Node: Bibliography, Next: FAQ, Prev: Limitations, Up: Top + +25 Additional Reading +********************* + +You may wish to read more about the following programs: + * lex + * yacc + * sed + * awk + + The following books may contain material of interest: + + John Levine, Tony Mason, and Doug Brown, _Lex & Yacc_, O'Reilly and +Associates. Be sure to get the 2nd edition. + + M. E. Lesk and E. Schmidt, _LEX - Lexical Analyzer Generator_ + + Alfred Aho, Ravi Sethi and Jeffrey Ullman, _Compilers: Principles, +Techniques and Tools_, Addison-Wesley (1986). Describes the +pattern-matching techniques used by 'flex' (deterministic finite +automata). + + +File: flex.info, Node: FAQ, Next: Appendices, Prev: Bibliography, Up: Top + +FAQ +*** + +From time to time, the 'flex' maintainer receives certain questions. +Rather than repeat answers to well-understood problems, we publish them +here. + +* Menu: + +* When was flex born?:: +* How do I expand backslash-escape sequences in C-style quoted strings?:: +* Why do flex scanners call fileno if it is not ANSI compatible?:: +* Does flex support recursive pattern definitions?:: +* How do I skip huge chunks of input (tens of megabytes) while using flex?:: +* Flex is not matching my patterns in the same order that I defined them.:: +* My actions are executing out of order or sometimes not at all.:: +* How can I have multiple input sources feed into the same scanner at the same time?:: +* Can I build nested parsers that work with the same input file?:: +* How can I match text only at the end of a file?:: +* How can I make REJECT cascade across start condition boundaries?:: +* Why cant I use fast or full tables with interactive mode?:: +* How much faster is -F or -f than -C?:: +* If I have a simple grammar cant I just parse it with flex?:: +* Why doesn't yyrestart() set the start state back to INITIAL?:: +* How can I match C-style comments?:: +* The period isn't working the way I expected.:: +* Can I get the flex manual in another format?:: +* Does there exist a "faster" NDFA->DFA algorithm?:: +* How does flex compile the DFA so quickly?:: +* How can I use more than 8192 rules?:: +* How do I abandon a file in the middle of a scan and switch to a new file?:: +* How do I execute code only during initialization (only before the first scan)?:: +* How do I execute code at termination?:: +* Where else can I find help?:: +* Can I include comments in the "rules" section of the file?:: +* I get an error about undefined yywrap().:: +* How can I change the matching pattern at run time?:: +* How can I expand macros in the input?:: +* How can I build a two-pass scanner?:: +* How do I match any string not matched in the preceding rules?:: +* I am trying to port code from AT&T lex that uses yysptr and yysbuf.:: +* Is there a way to make flex treat NULL like a regular character?:: +* Whenever flex can not match the input it says "flex scanner jammed".:: +* Why doesn't flex have non-greedy operators like perl does?:: +* Memory leak - 16386 bytes allocated by malloc.:: +* How do I track the byte offset for lseek()?:: +* How do I use my own I/O classes in a C++ scanner?:: +* How do I skip as many chars as possible?:: +* deleteme00:: +* Are certain equivalent patterns faster than others?:: +* Is backing up a big deal?:: +* Can I fake multi-byte character support?:: +* deleteme01:: +* Can you discuss some flex internals?:: +* unput() messes up yy_at_bol:: +* The | operator is not doing what I want:: +* Why can't flex understand this variable trailing context pattern?:: +* The ^ operator isn't working:: +* Trailing context is getting confused with trailing optional patterns:: +* Is flex GNU or not?:: +* ERASEME53:: +* I need to scan if-then-else blocks and while loops:: +* ERASEME55:: +* ERASEME56:: +* ERASEME57:: +* Is there a repository for flex scanners?:: +* How can I conditionally compile or preprocess my flex input file?:: +* Where can I find grammars for lex and yacc?:: +* I get an end-of-buffer message for each character scanned.:: +* unnamed-faq-62:: +* unnamed-faq-63:: +* unnamed-faq-64:: +* unnamed-faq-65:: +* unnamed-faq-66:: +* unnamed-faq-67:: +* unnamed-faq-68:: +* unnamed-faq-69:: +* unnamed-faq-70:: +* unnamed-faq-71:: +* unnamed-faq-72:: +* unnamed-faq-73:: +* unnamed-faq-74:: +* unnamed-faq-75:: +* unnamed-faq-76:: +* unnamed-faq-77:: +* unnamed-faq-78:: +* unnamed-faq-79:: +* unnamed-faq-80:: +* unnamed-faq-81:: +* unnamed-faq-82:: +* unnamed-faq-83:: +* unnamed-faq-84:: +* unnamed-faq-85:: +* unnamed-faq-86:: +* unnamed-faq-87:: +* unnamed-faq-88:: +* unnamed-faq-90:: +* unnamed-faq-91:: +* unnamed-faq-92:: +* unnamed-faq-93:: +* unnamed-faq-94:: +* unnamed-faq-95:: +* unnamed-faq-96:: +* unnamed-faq-97:: +* unnamed-faq-98:: +* unnamed-faq-99:: +* unnamed-faq-100:: +* unnamed-faq-101:: +* What is the difference between YYLEX_PARAM and YY_DECL?:: +* Why do I get "conflicting types for yylex" error?:: +* How do I access the values set in a Flex action from within a Bison action?:: + + +File: flex.info, Node: When was flex born?, Next: How do I expand backslash-escape sequences in C-style quoted strings?, Up: FAQ + +When was flex born? +=================== + +Vern Paxson took over the 'Software Tools' lex project from Jef +Poskanzer in 1982. At that point it was written in Ratfor. Around 1987 +or so, Paxson translated it into C, and a legend was born :-). + + +File: flex.info, Node: How do I expand backslash-escape sequences in C-style quoted strings?, Next: Why do flex scanners call fileno if it is not ANSI compatible?, Prev: When was flex born?, Up: FAQ + +How do I expand backslash-escape sequences in C-style quoted strings? +===================================================================== + +A key point when scanning quoted strings is that you cannot (easily) +write a single rule that will precisely match the string if you allow +things like embedded escape sequences and newlines. If you try to match +strings with a single rule then you'll wind up having to rescan the +string anyway to find any escape sequences. + + Instead you can use exclusive start conditions and a set of rules, +one for matching non-escaped text, one for matching a single escape, one +for matching an embedded newline, and one for recognizing the end of the +string. Each of these rules is then faced with the question of where to +put its intermediary results. The best solution is for the rules to +append their local value of 'yytext' to the end of a "string literal" +buffer. A rule like the escape-matcher will append to the buffer the +meaning of the escape sequence rather than the literal text in 'yytext'. +In this way, 'yytext' does not need to be modified at all. + + +File: flex.info, Node: Why do flex scanners call fileno if it is not ANSI compatible?, Next: Does flex support recursive pattern definitions?, Prev: How do I expand backslash-escape sequences in C-style quoted strings?, Up: FAQ + +Why do flex scanners call fileno if it is not ANSI compatible? +============================================================== + +Flex scanners call 'fileno()' in order to get the file descriptor +corresponding to 'yyin'. The file descriptor may be passed to +'isatty()' or 'read()', depending upon which '%options' you specified. +If your system does not have 'fileno()' support, to get rid of the +'read()' call, do not specify '%option read'. To get rid of the +'isatty()' call, you must specify one of '%option always-interactive' or +'%option never-interactive'. + + +File: flex.info, Node: Does flex support recursive pattern definitions?, Next: How do I skip huge chunks of input (tens of megabytes) while using flex?, Prev: Why do flex scanners call fileno if it is not ANSI compatible?, Up: FAQ + +Does flex support recursive pattern definitions? +================================================ + +e.g., + + %% + block "{"({block}|{statement})*"}" + + No. You cannot have recursive definitions. The pattern-matching +power of regular expressions in general (and therefore flex scanners, +too) is limited. In particular, regular expressions cannot "balance" +parentheses to an arbitrary degree. For example, it's impossible to +write a regular expression that matches all strings containing the same +number of '{'s as '}'s. For more powerful pattern matching, you need a +parser, such as 'GNU bison'. + + +File: flex.info, Node: How do I skip huge chunks of input (tens of megabytes) while using flex?, Next: Flex is not matching my patterns in the same order that I defined them., Prev: Does flex support recursive pattern definitions?, Up: FAQ + +How do I skip huge chunks of input (tens of megabytes) while using flex? +======================================================================== + +Use 'fseek()' (or 'lseek()') to position yyin, then call 'yyrestart()'. + + +File: flex.info, Node: Flex is not matching my patterns in the same order that I defined them., Next: My actions are executing out of order or sometimes not at all., Prev: How do I skip huge chunks of input (tens of megabytes) while using flex?, Up: FAQ + +Flex is not matching my patterns in the same order that I defined them. +======================================================================= + +'flex' picks the rule that matches the most text (i.e., the longest +possible input string). This is because 'flex' uses an entirely +different matching technique ("deterministic finite automata") that +actually does all of the matching simultaneously, in parallel. (Seems +impossible, but it's actually a fairly simple technique once you +understand the principles.) + + A side-effect of this parallel matching is that when the input +matches more than one rule, 'flex' scanners pick the rule that matched +the _most_ text. This is explained further in the manual, in the +section *Note Matching::. + + If you want 'flex' to choose a shorter match, then you can work +around this behavior by expanding your short rule to match more text, +then put back the extra: + + data_.* yyless( 5 ); BEGIN BLOCKIDSTATE; + + Another fix would be to make the second rule active only during the +'<BLOCKIDSTATE>' start condition, and make that start condition +exclusive by declaring it with '%x' instead of '%s'. + + A final fix is to change the input language so that the ambiguity for +'data_' is removed, by adding characters to it that don't match the +identifier rule, or by removing characters (such as '_') from the +identifier rule so it no longer matches 'data_'. (Of course, you might +also not have the option of changing the input language.) + + +File: flex.info, Node: My actions are executing out of order or sometimes not at all., Next: How can I have multiple input sources feed into the same scanner at the same time?, Prev: Flex is not matching my patterns in the same order that I defined them., Up: FAQ + +My actions are executing out of order or sometimes not at all. +============================================================== + +Most likely, you have (in error) placed the opening '{' of the action +block on a different line than the rule, e.g., + + ^(foo|bar) + { <<<--- WRONG! + + } + + 'flex' requires that the opening '{' of an action associated with a +rule begin on the same line as does the rule. You need instead to write +your rules as follows: + + ^(foo|bar) { // CORRECT! + + } + + +File: flex.info, Node: How can I have multiple input sources feed into the same scanner at the same time?, Next: Can I build nested parsers that work with the same input file?, Prev: My actions are executing out of order or sometimes not at all., Up: FAQ + +How can I have multiple input sources feed into the same scanner at the same time? +================================================================================== + +If ... + * your scanner is free of backtracking (verified using 'flex''s '-b' + flag), + * AND you run your scanner interactively ('-I' option; default unless + using special table compression options), + * AND you feed it one character at a time by redefining 'YY_INPUT' to + do so, + + then every time it matches a token, it will have exhausted its input +buffer (because the scanner is free of backtracking). This means you +can safely use 'select()' at the point and only call 'yylex()' for +another token if 'select()' indicates there's data available. + + That is, move the 'select()' out from the input function to a point +where it determines whether 'yylex()' gets called for the next token. + + With this approach, you will still have problems if your input can +arrive piecemeal; 'select()' could inform you that the beginning of a +token is available, you call 'yylex()' to get it, but it winds up +blocking waiting for the later characters in the token. + + Here's another way: Move your input multiplexing inside of +'YY_INPUT'. That is, whenever 'YY_INPUT' is called, it 'select()''s to +see where input is available. If input is available for the scanner, it +reads and returns the next byte. If input is available from another +source, it calls whatever function is responsible for reading from that +source. (If no input is available, it blocks until some input is +available.) I've used this technique in an interpreter I wrote that +both reads keyboard input using a 'flex' scanner and IPC traffic from +sockets, and it works fine. + + +File: flex.info, Node: Can I build nested parsers that work with the same input file?, Next: How can I match text only at the end of a file?, Prev: How can I have multiple input sources feed into the same scanner at the same time?, Up: FAQ + +Can I build nested parsers that work with the same input file? +============================================================== + +This is not going to work without some additional effort. The reason is +that 'flex' block-buffers the input it reads from 'yyin'. This means +that the "outermost" 'yylex()', when called, will automatically slurp up +the first 8K of input available on yyin, and subsequent calls to other +'yylex()''s won't see that input. You might be tempted to work around +this problem by redefining 'YY_INPUT' to only return a small amount of +text, but it turns out that that approach is quite difficult. Instead, +the best solution is to combine all of your scanners into one large +scanner, using a different exclusive start condition for each. + + +File: flex.info, Node: How can I match text only at the end of a file?, Next: How can I make REJECT cascade across start condition boundaries?, Prev: Can I build nested parsers that work with the same input file?, Up: FAQ + +How can I match text only at the end of a file? +=============================================== + +There is no way to write a rule which is "match this text, but only if +it comes at the end of the file". You can fake it, though, if you +happen to have a character lying around that you don't allow in your +input. Then you redefine 'YY_INPUT' to call your own routine which, if +it sees an 'EOF', returns the magic character first (and remembers to +return a real 'EOF' next time it's called). Then you could write: + + <COMMENT>(.|\n)*{EOF_CHAR} /* saw comment at EOF */ + + +File: flex.info, Node: How can I make REJECT cascade across start condition boundaries?, Next: Why cant I use fast or full tables with interactive mode?, Prev: How can I match text only at the end of a file?, Up: FAQ + +How can I make REJECT cascade across start condition boundaries? +================================================================ + +You can do this as follows. Suppose you have a start condition 'A', and +after exhausting all of the possible matches in '<A>', you want to try +matches in '<INITIAL>'. Then you could use the following: + + %x A + %% + <A>rule_that_is_long ...; REJECT; + <A>rule ...; REJECT; /* shorter rule */ + <A>etc. + ... + <A>.|\n { + /* Shortest and last rule in <A>, so + * cascaded REJECTs will eventually + * wind up matching this rule. We want + * to now switch to the initial state + * and try matching from there instead. + */ + yyless(0); /* put back matched text */ + BEGIN(INITIAL); + } + + +File: flex.info, Node: Why cant I use fast or full tables with interactive mode?, Next: How much faster is -F or -f than -C?, Prev: How can I make REJECT cascade across start condition boundaries?, Up: FAQ + +Why can't I use fast or full tables with interactive mode? +========================================================== + +One of the assumptions flex makes is that interactive applications are +inherently slow (they're waiting on a human after all). It has to do +with how the scanner detects that it must be finished scanning a token. +For interactive scanners, after scanning each character the current +state is looked up in a table (essentially) to see whether there's a +chance of another input character possibly extending the length of the +match. If not, the scanner halts. For non-interactive scanners, the +end-of-token test is much simpler, basically a compare with 0, so no +memory bus cycles. Since the test occurs in the innermost scanning +loop, one would like to make it go as fast as possible. + + Still, it seems reasonable to allow the user to choose to trade off a +bit of performance in this area to gain the corresponding flexibility. +There might be another reason, though, why fast scanners don't support +the interactive option. + + +File: flex.info, Node: How much faster is -F or -f than -C?, Next: If I have a simple grammar cant I just parse it with flex?, Prev: Why cant I use fast or full tables with interactive mode?, Up: FAQ + +How much faster is -F or -f than -C? +==================================== + +Much faster (factor of 2-3). + + +File: flex.info, Node: If I have a simple grammar cant I just parse it with flex?, Next: Why doesn't yyrestart() set the start state back to INITIAL?, Prev: How much faster is -F or -f than -C?, Up: FAQ + +If I have a simple grammar can't I just parse it with flex? +=========================================================== + +Is your grammar recursive? That's almost always a sign that you're +better off using a parser/scanner rather than just trying to use a +scanner alone. + + +File: flex.info, Node: Why doesn't yyrestart() set the start state back to INITIAL?, Next: How can I match C-style comments?, Prev: If I have a simple grammar cant I just parse it with flex?, Up: FAQ + +Why doesn't yyrestart() set the start state back to INITIAL? +============================================================ + +There are two reasons. The first is that there might be programs that +rely on the start state not changing across file changes. The second is +that beginning with 'flex' version 2.4, use of 'yyrestart()' is no +longer required, so fixing the problem there doesn't solve the more +general problem. + + +File: flex.info, Node: How can I match C-style comments?, Next: The period isn't working the way I expected., Prev: Why doesn't yyrestart() set the start state back to INITIAL?, Up: FAQ + +How can I match C-style comments? +================================= + +You might be tempted to try something like this: + + "/*".*"*/" // WRONG! + + or, worse, this: + + "/*"(.|\n)"*/" // WRONG! + + The above rules will eat too much input, and blow up on things like: + + /* a comment */ do_my_thing( "oops */" ); + + Here is one way which allows you to track line information: + + <INITIAL>{ + "/*" BEGIN(IN_COMMENT); + } + <IN_COMMENT>{ + "*/" BEGIN(INITIAL); + [^*\n]+ // eat comment in chunks + "*" // eat the lone star + \n yylineno++; + } + + +File: flex.info, Node: The period isn't working the way I expected., Next: Can I get the flex manual in another format?, Prev: How can I match C-style comments?, Up: FAQ + +The '.' isn't working the way I expected. +========================================= + +Here are some tips for using '.': + + * A common mistake is to place the grouping parenthesis AFTER an + operator, when you really meant to place the parenthesis BEFORE the + operator, e.g., you probably want this '(foo|bar)+' and NOT this + '(foo|bar+)'. + + The first pattern matches the words 'foo' or 'bar' any number of + times, e.g., it matches the text 'barfoofoobarfoo'. The second + pattern matches a single instance of 'foo' or a single instance of + 'bar' followed by one or more 'r's, e.g., it matches the text + 'barrrr' . + * A '.' inside '[]''s just means a literal'.' (period), and NOT "any + character except newline". + * Remember that '.' matches any character EXCEPT '\n' (and 'EOF'). + If you really want to match ANY character, including newlines, then + use '(.|\n)' Beware that the regex '(.|\n)+' will match your entire + input! + * Finally, if you want to match a literal '.' (a period), then use + '[.]' or '"."' + + +File: flex.info, Node: Can I get the flex manual in another format?, Next: Does there exist a "faster" NDFA->DFA algorithm?, Prev: The period isn't working the way I expected., Up: FAQ + +Can I get the flex manual in another format? +============================================ + +The 'flex' source distribution includes a texinfo manual. You are free +to convert that texinfo into whatever format you desire. The 'texinfo' +package includes tools for conversion to a number of formats. + + +File: flex.info, Node: Does there exist a "faster" NDFA->DFA algorithm?, Next: How does flex compile the DFA so quickly?, Prev: Can I get the flex manual in another format?, Up: FAQ + +Does there exist a "faster" NDFA->DFA algorithm? +================================================ + +There's no way around the potential exponential running time - it can +take you exponential time just to enumerate all of the DFA states. In +practice, though, the running time is closer to linear, or sometimes +quadratic. + + +File: flex.info, Node: How does flex compile the DFA so quickly?, Next: How can I use more than 8192 rules?, Prev: Does there exist a "faster" NDFA->DFA algorithm?, Up: FAQ + +How does flex compile the DFA so quickly? +========================================= + +There are two big speed wins that 'flex' uses: + + 1. It analyzes the input rules to construct equivalence classes for + those characters that always make the same transitions. It then + rewrites the NFA using equivalence classes for transitions instead + of characters. This cuts down the NFA->DFA computation time + dramatically, to the point where, for uncompressed DFA tables, the + DFA generation is often I/O bound in writing out the tables. + 2. It maintains hash values for previously computed DFA states, so + testing whether a newly constructed DFA state is equivalent to a + previously constructed state can be done very quickly, by first + comparing hash values. + + +File: flex.info, Node: How can I use more than 8192 rules?, Next: How do I abandon a file in the middle of a scan and switch to a new file?, Prev: How does flex compile the DFA so quickly?, Up: FAQ + +How can I use more than 8192 rules? +=================================== + +'Flex' is compiled with an upper limit of 8192 rules per scanner. If +you need more than 8192 rules in your scanner, you'll have to recompile +'flex' with the following changes in 'flexdef.h': + + < #define YY_TRAILING_MASK 0x2000 + < #define YY_TRAILING_HEAD_MASK 0x4000 + -- + > #define YY_TRAILING_MASK 0x20000000 + > #define YY_TRAILING_HEAD_MASK 0x40000000 + + This should work okay as long as your C compiler uses 32 bit +integers. But you might want to think about whether using such a huge +number of rules is the best way to solve your problem. + + The following may also be relevant: + + With luck, you should be able to increase the definitions in +flexdef.h for: + + #define JAMSTATE -32766 /* marks a reference to the state that always jams */ + #define MAXIMUM_MNS 31999 + #define BAD_SUBSCRIPT -32767 + + recompile everything, and it'll all work. Flex only has these +16-bit-like values built into it because a long time ago it was +developed on a machine with 16-bit ints. I've given this advice to +others in the past but haven't heard back from them whether it worked +okay or not... + + +File: flex.info, Node: How do I abandon a file in the middle of a scan and switch to a new file?, Next: How do I execute code only during initialization (only before the first scan)?, Prev: How can I use more than 8192 rules?, Up: FAQ + +How do I abandon a file in the middle of a scan and switch to a new file? +========================================================================= + +Just call 'yyrestart(newfile)'. Be sure to reset the start state if you +want a "fresh start, since 'yyrestart' does NOT reset the start state +back to 'INITIAL'. + + +File: flex.info, Node: How do I execute code only during initialization (only before the first scan)?, Next: How do I execute code at termination?, Prev: How do I abandon a file in the middle of a scan and switch to a new file?, Up: FAQ + +How do I execute code only during initialization (only before the first scan)? +============================================================================== + +You can specify an initial action by defining the macro 'YY_USER_INIT' +(though note that 'yyout' may not be available at the time this macro is +executed). Or you can add to the beginning of your rules section: + + %% + /* Must be indented! */ + static int did_init = 0; + + if ( ! did_init ){ + do_my_init(); + did_init = 1; + } + + +File: flex.info, Node: How do I execute code at termination?, Next: Where else can I find help?, Prev: How do I execute code only during initialization (only before the first scan)?, Up: FAQ + +How do I execute code at termination? +===================================== + +You can specify an action for the '<<EOF>>' rule. + + +File: flex.info, Node: Where else can I find help?, Next: Can I include comments in the "rules" section of the file?, Prev: How do I execute code at termination?, Up: FAQ + +Where else can I find help? +=========================== + +You can find the flex homepage on the web at +<http://flex.sourceforge.net/>. See that page for details about flex +mailing lists as well. + + +File: flex.info, Node: Can I include comments in the "rules" section of the file?, Next: I get an error about undefined yywrap()., Prev: Where else can I find help?, Up: FAQ + +Can I include comments in the "rules" section of the file? +========================================================== + +Yes, just about anywhere you want to. See the manual for the specific +syntax. + + +File: flex.info, Node: I get an error about undefined yywrap()., Next: How can I change the matching pattern at run time?, Prev: Can I include comments in the "rules" section of the file?, Up: FAQ + +I get an error about undefined yywrap(). +======================================== + +You must supply a 'yywrap()' function of your own, or link to 'libfl.a' +(which provides one), or use + + %option noyywrap + + in your source to say you don't want a 'yywrap()' function. + + +File: flex.info, Node: How can I change the matching pattern at run time?, Next: How can I expand macros in the input?, Prev: I get an error about undefined yywrap()., Up: FAQ + +How can I change the matching pattern at run time? +================================================== + +You can't, it's compiled into a static table when flex builds the +scanner. + + +File: flex.info, Node: How can I expand macros in the input?, Next: How can I build a two-pass scanner?, Prev: How can I change the matching pattern at run time?, Up: FAQ + +How can I expand macros in the input? +===================================== + +The best way to approach this problem is at a higher level, e.g., in the +parser. + + However, you can do this using multiple input buffers. + + %% + macro/[a-z]+ { + /* Saw the macro "macro" followed by extra stuff. */ + main_buffer = YY_CURRENT_BUFFER; + expansion_buffer = yy_scan_string(expand(yytext)); + yy_switch_to_buffer(expansion_buffer); + } + + <<EOF>> { + if ( expansion_buffer ) + { + // We were doing an expansion, return to where + // we were. + yy_switch_to_buffer(main_buffer); + yy_delete_buffer(expansion_buffer); + expansion_buffer = 0; + } + else + yyterminate(); + } + + You probably will want a stack of expansion buffers to allow nested +macros. From the above though hopefully the idea is clear. + + +File: flex.info, Node: How can I build a two-pass scanner?, Next: How do I match any string not matched in the preceding rules?, Prev: How can I expand macros in the input?, Up: FAQ + +How can I build a two-pass scanner? +=================================== + +One way to do it is to filter the first pass to a temporary file, then +process the temporary file on the second pass. You will probably see a +performance hit, due to all the disk I/O. + + When you need to look ahead far forward like this, it almost always +means that the right solution is to build a parse tree of the entire +input, then walk it after the parse in order to generate the output. In +a sense, this is a two-pass approach, once through the text and once +through the parse tree, but the performance hit for the latter is +usually an order of magnitude smaller, since everything is already +classified, in binary format, and residing in memory. + + +File: flex.info, Node: How do I match any string not matched in the preceding rules?, Next: I am trying to port code from AT&T lex that uses yysptr and yysbuf., Prev: How can I build a two-pass scanner?, Up: FAQ + +How do I match any string not matched in the preceding rules? +============================================================= + +One way to assign precedence, is to place the more specific rules first. +If two rules would match the same input (same sequence of characters) +then the first rule listed in the 'flex' input wins, e.g., + + %% + foo[a-zA-Z_]+ return FOO_ID; + bar[a-zA-Z_]+ return BAR_ID; + [a-zA-Z_]+ return GENERIC_ID; + + Note that the rule '[a-zA-Z_]+' must come *after* the others. It +will match the same amount of text as the more specific rules, and in +that case the 'flex' scanner will pick the first rule listed in your +scanner as the one to match. + + +File: flex.info, Node: I am trying to port code from AT&T lex that uses yysptr and yysbuf., Next: Is there a way to make flex treat NULL like a regular character?, Prev: How do I match any string not matched in the preceding rules?, Up: FAQ + +I am trying to port code from AT&T lex that uses yysptr and yysbuf. +=================================================================== + +Those are internal variables pointing into the AT&T scanner's input +buffer. I imagine they're being manipulated in user versions of the +'input()' and 'unput()' functions. If so, what you need to do is +analyze those functions to figure out what they're doing, and then +replace 'input()' with an appropriate definition of 'YY_INPUT'. You +shouldn't need to (and must not) replace 'flex''s 'unput()' function. + + +File: flex.info, Node: Is there a way to make flex treat NULL like a regular character?, Next: Whenever flex can not match the input it says "flex scanner jammed"., Prev: I am trying to port code from AT&T lex that uses yysptr and yysbuf., Up: FAQ + +Is there a way to make flex treat NULL like a regular character? +================================================================ + +Yes, '\0' and '\x00' should both do the trick. Perhaps you have an +ancient version of 'flex'. The latest release is version 2.6.4. + + +File: flex.info, Node: Whenever flex can not match the input it says "flex scanner jammed"., Next: Why doesn't flex have non-greedy operators like perl does?, Prev: Is there a way to make flex treat NULL like a regular character?, Up: FAQ + +Whenever flex can not match the input it says "flex scanner jammed". +==================================================================== + +You need to add a rule that matches the otherwise-unmatched text, e.g., + + %option yylineno + %% + [[a bunch of rules here]] + + . printf("bad input character '%s' at line %d\n", yytext, yylineno); + + See '%option default' for more information. + + +File: flex.info, Node: Why doesn't flex have non-greedy operators like perl does?, Next: Memory leak - 16386 bytes allocated by malloc., Prev: Whenever flex can not match the input it says "flex scanner jammed"., Up: FAQ + +Why doesn't flex have non-greedy operators like perl does? +========================================================== + +A DFA can do a non-greedy match by stopping the first time it enters an +accepting state, instead of consuming input until it determines that no +further matching is possible (a "jam" state). This is actually easier +to implement than longest leftmost match (which flex does). + + But it's also much less useful than longest leftmost match. In +general, when you find yourself wishing for non-greedy matching, that's +usually a sign that you're trying to make the scanner do some parsing. +That's generally the wrong approach, since it lacks the power to do a +decent job. Better is to either introduce a separate parser, or to +split the scanner into multiple scanners using (exclusive) start +conditions. + + You might have a separate start state once you've seen the 'BEGIN'. +In that state, you might then have a regex that will match 'END' (to +kick you out of the state), and perhaps '(.|\n)' to get a single +character within the chunk ... + + This approach also has much better error-reporting properties. + + +File: flex.info, Node: Memory leak - 16386 bytes allocated by malloc., Next: How do I track the byte offset for lseek()?, Prev: Why doesn't flex have non-greedy operators like perl does?, Up: FAQ + +Memory leak - 16386 bytes allocated by malloc. +============================================== + +UPDATED 2002-07-10: As of 'flex' version 2.5.9, this leak means that you +did not call 'yylex_destroy()'. If you are using an earlier version of +'flex', then read on. + + The leak is about 16426 bytes. That is, (8192 * 2 + 2) for the +read-buffer, and about 40 for 'struct yy_buffer_state' (depending upon +alignment). The leak is in the non-reentrant C scanner only (NOT in the +reentrant scanner, NOT in the C++ scanner). Since 'flex' doesn't know +when you are done, the buffer is never freed. + + However, the leak won't multiply since the buffer is reused no matter +how many times you call 'yylex()'. + + If you want to reclaim the memory when you are completely done +scanning, then you might try this: + + /* For non-reentrant C scanner only. */ + yy_delete_buffer(YY_CURRENT_BUFFER); + yy_init = 1; + + Note: 'yy_init' is an "internal variable", and hasn't been tested in +this situation. It is possible that some other globals may need +resetting as well. + + +File: flex.info, Node: How do I track the byte offset for lseek()?, Next: How do I use my own I/O classes in a C++ scanner?, Prev: Memory leak - 16386 bytes allocated by malloc., Up: FAQ + +How do I track the byte offset for lseek()? +=========================================== + + > We thought that it would be possible to have this number through the + > evaluation of the following expression: + > + > seek_position = (no_buffers)*YY_READ_BUF_SIZE + yy_c_buf_p - YY_CURRENT_BUFFER->yy_ch_buf + + While this is the right idea, it has two problems. The first is that +it's possible that 'flex' will request less than 'YY_READ_BUF_SIZE' +during an invocation of 'YY_INPUT' (or that your input source will +return less even though 'YY_READ_BUF_SIZE' bytes were requested). The +second problem is that when refilling its internal buffer, 'flex' keeps +some characters from the previous buffer (because usually it's in the +middle of a match, and needs those characters to construct 'yytext' for +the match once it's done). Because of this, 'yy_c_buf_p - +YY_CURRENT_BUFFER->yy_ch_buf' won't be exactly the number of characters +already read from the current buffer. + + An alternative solution is to count the number of characters you've +matched since starting to scan. This can be done by using +'YY_USER_ACTION'. For example, + + #define YY_USER_ACTION num_chars += yyleng; + + (You need to be careful to update your bookkeeping if you use +'yymore('), 'yyless()', 'unput()', or 'input()'.) + + +File: flex.info, Node: How do I use my own I/O classes in a C++ scanner?, Next: How do I skip as many chars as possible?, Prev: How do I track the byte offset for lseek()?, Up: FAQ + +How do I use my own I/O classes in a C++ scanner? +================================================= + +When the flex C++ scanning class rewrite finally happens, then this sort +of thing should become much easier. + + You can do this by passing the various functions (such as +'LexerInput()' and 'LexerOutput()') NULL 'iostream*''s, and then dealing +with your own I/O classes surreptitiously (i.e., stashing them in +special member variables). This works because the only assumption about +the lexer regarding what's done with the iostream's is that they're +ultimately passed to 'LexerInput()' and 'LexerOutput', which then do +whatever is necessary with them. + + +File: flex.info, Node: How do I skip as many chars as possible?, Next: deleteme00, Prev: How do I use my own I/O classes in a C++ scanner?, Up: FAQ + +How do I skip as many chars as possible? +======================================== + +How do I skip as many chars as possible - without interfering with the +other patterns? + + In the example below, we want to skip over characters until we see +the phrase "endskip". The following will _NOT_ work correctly (do you +see why not?) + + /* INCORRECT SCANNER */ + %x SKIP + %% + <INITIAL>startskip BEGIN(SKIP); + ... + <SKIP>"endskip" BEGIN(INITIAL); + <SKIP>.* ; + + The problem is that the pattern .* will eat up the word "endskip." +The simplest (but slow) fix is: + + <SKIP>"endskip" BEGIN(INITIAL); + <SKIP>. ; + + The fix involves making the second rule match more, without making it +match "endskip" plus something else. So for example: + + <SKIP>"endskip" BEGIN(INITIAL); + <SKIP>[^e]+ ; + <SKIP>. ;/* so you eat up e's, too */ + + +File: flex.info, Node: deleteme00, Next: Are certain equivalent patterns faster than others?, Prev: How do I skip as many chars as possible?, Up: FAQ + +deleteme00 +========== + + QUESTION: + When was flex born? + + Vern Paxson took over + the Software Tools lex project from Jef Poskanzer in 1982. At that point it + was written in Ratfor. Around 1987 or so, Paxson translated it into C, and + a legend was born :-). + + +File: flex.info, Node: Are certain equivalent patterns faster than others?, Next: Is backing up a big deal?, Prev: deleteme00, Up: FAQ + +Are certain equivalent patterns faster than others? +=================================================== + + To: Adoram Rogel <adoram@orna.hybridge.com> + Subject: Re: Flex 2.5.2 performance questions + In-reply-to: Your message of Wed, 18 Sep 96 11:12:17 EDT. + Date: Wed, 18 Sep 96 10:51:02 PDT + From: Vern Paxson <vern> + + [Note, the most recent flex release is 2.5.4, which you can get from + ftp.ee.lbl.gov. It has bug fixes over 2.5.2 and 2.5.3.] + + > 1. Using the pattern + > ([Ff](oot)?)?[Nn](ote)?(\.)? + > instead of + > (((F|f)oot(N|n)ote)|((N|n)ote)|((N|n)\.)|((F|f)(N|n)(\.))) + > (in a very complicated flex program) caused the program to slow from + > 300K+/min to 100K/min (no other changes were done). + + These two are not equivalent. For example, the first can match "footnote." + but the second can only match "footnote". This is almost certainly the + cause in the discrepancy - the slower scanner run is matching more tokens, + and/or having to do more backing up. + + > 2. Which of these two are better: [Ff]oot or (F|f)oot ? + + From a performance point of view, they're equivalent (modulo presumably + minor effects such as memory cache hit rates; and the presence of trailing + context, see below). From a space point of view, the first is slightly + preferable. + + > 3. I have a pattern that look like this: + > pats {p1}|{p2}|{p3}|...|{p50} (50 patterns ORd) + > + > running yet another complicated program that includes the following rule: + > <snext>{and}/{no4}{bb}{pats} + > + > gets me to "too complicated - over 32,000 states"... + + I can't tell from this example whether the trailing context is variable-length + or fixed-length (it could be the latter if {and} is fixed-length). If it's + variable length, which flex -p will tell you, then this reflects a basic + performance problem, and if you can eliminate it by restructuring your + scanner, you will see significant improvement. + + > so I divided {pats} to {pats1}, {pats2},..., {pats5} each consists of about + > 10 patterns and changed the rule to be 5 rules. + > This did compile, but what is the rule of thumb here ? + + The rule is to avoid trailing context other than fixed-length, in which for + a/b, either the 'a' pattern or the 'b' pattern have a fixed length. Use + of the '|' operator automatically makes the pattern variable length, so in + this case '[Ff]oot' is preferred to '(F|f)oot'. + + > 4. I changed a rule that looked like this: + > <snext8>{and}{bb}/{ROMAN}[^A-Za-z] { BEGIN... + > + > to the next 2 rules: + > <snext8>{and}{bb}/{ROMAN}[A-Za-z] { ECHO;} + > <snext8>{and}{bb}/{ROMAN} { BEGIN... + > + > Again, I understand the using [^...] will cause a great performance loss + + Actually, it doesn't cause any sort of performance loss. It's a surprising + fact about regular expressions that they always match in linear time + regardless of how complex they are. + + > but are there any specific rules about it ? + + See the "Performance Considerations" section of the man page, and also + the example in MISC/fastwc/. + + Vern + + +File: flex.info, Node: Is backing up a big deal?, Next: Can I fake multi-byte character support?, Prev: Are certain equivalent patterns faster than others?, Up: FAQ + +Is backing up a big deal? +========================= + + To: Adoram Rogel <adoram@hybridge.com> + Subject: Re: Flex 2.5.2 performance questions + In-reply-to: Your message of Thu, 19 Sep 96 10:16:04 EDT. + Date: Thu, 19 Sep 96 09:58:00 PDT + From: Vern Paxson <vern> + + > a lot about the backing up problem. + > I believe that there lies my biggest problem, and I'll try to improve + > it. + + Since you have variable trailing context, this is a bigger performance + problem. Fixing it is usually easier than fixing backing up, which in a + complicated scanner (yours seems to fit the bill) can be extremely + difficult to do correctly. + + You also don't mention what flags you are using for your scanner. + -f makes a large speed difference, and -Cfe buys you nearly as much + speed but the resulting scanner is considerably smaller. + + > I have an | operator in {and} and in {pats} so both of them are variable + > length. + + -p should have reported this. + + > Is changing one of them to fixed-length is enough ? + + Yes. + + > Is it possible to change the 32,000 states limit ? + + Yes. I've appended instructions on how. Before you make this change, + though, you should think about whether there are ways to fundamentally + simplify your scanner - those are certainly preferable! + + Vern + + To increase the 32K limit (on a machine with 32 bit integers), you increase + the magnitude of the following in flexdef.h: + + #define JAMSTATE -32766 /* marks a reference to the state that always jams */ + #define MAXIMUM_MNS 31999 + #define BAD_SUBSCRIPT -32767 + #define MAX_SHORT 32700 + + Adding a 0 or two after each should do the trick. + + +File: flex.info, Node: Can I fake multi-byte character support?, Next: deleteme01, Prev: Is backing up a big deal?, Up: FAQ + +Can I fake multi-byte character support? +======================================== + + To: Heeman_Lee@hp.com + Subject: Re: flex - multi-byte support? + In-reply-to: Your message of Thu, 03 Oct 1996 17:24:04 PDT. + Date: Fri, 04 Oct 1996 11:42:18 PDT + From: Vern Paxson <vern> + + > I assume as long as my *.l file defines the + > range of expected character code values (in octal format), flex will + > scan the file and read multi-byte characters correctly. But I have no + > confidence in this assumption. + + Your lack of confidence is justified - this won't work. + + Flex has in it a widespread assumption that the input is processed + one byte at a time. Fixing this is on the to-do list, but is involved, + so it won't happen any time soon. In the interim, the best I can suggest + (unless you want to try fixing it yourself) is to write your rules in + terms of pairs of bytes, using definitions in the first section: + + X \xfe\xc2 + ... + %% + foo{X}bar found_foo_fe_c2_bar(); + + etc. Definitely a pain - sorry about that. + + By the way, the email address you used for me is ancient, indicating you + have a very old version of flex. You can get the most recent, 2.5.4, from + ftp.ee.lbl.gov. + + Vern + + +File: flex.info, Node: deleteme01, Next: Can you discuss some flex internals?, Prev: Can I fake multi-byte character support?, Up: FAQ + +deleteme01 +========== + + To: moleary@primus.com + Subject: Re: Flex / Unicode compatibility question + In-reply-to: Your message of Tue, 22 Oct 1996 10:15:42 PDT. + Date: Tue, 22 Oct 1996 11:06:13 PDT + From: Vern Paxson <vern> + + Unfortunately flex at the moment has a widespread assumption within it + that characters are processed 8 bits at a time. I don't see any easy + fix for this (other than writing your rules in terms of double characters - + a pain). I also don't know of a wider lex, though you might try surfing + the Plan 9 stuff because I know it's a Unicode system, and also the PCCT + toolkit (try searching say Alta Vista for "Purdue Compiler Construction + Toolkit"). + + Fixing flex to handle wider characters is on the long-term to-do list. + But since flex is a strictly spare-time project these days, this probably + won't happen for quite a while, unless someone else does it first. + + Vern + + +File: flex.info, Node: Can you discuss some flex internals?, Next: unput() messes up yy_at_bol, Prev: deleteme01, Up: FAQ + +Can you discuss some flex internals? +==================================== + + To: Johan Linde <jl@theophys.kth.se> + Subject: Re: translation of flex + In-reply-to: Your message of Sun, 10 Nov 1996 09:16:36 PST. + Date: Mon, 11 Nov 1996 10:33:50 PST + From: Vern Paxson <vern> + + > I'm working for the Swedish team translating GNU program, and I'm currently + > working with flex. I have a few questions about some of the messages which + > I hope you can answer. + + All of the things you're wondering about, by the way, concerning flex + internals - probably the only person who understands what they mean in + English is me! So I wouldn't worry too much about getting them right. + That said ... + + > #: main.c:545 + > msgid " %d protos created\n" + > + > Does proto mean prototype? + + Yes - prototypes of state compression tables. + + > #: main.c:539 + > msgid " %d/%d (peak %d) template nxt-chk entries created\n" + > + > Here I'm mainly puzzled by 'nxt-chk'. I guess it means 'next-check'. (?) + > However, 'template next-check entries' doesn't make much sense to me. To be + > able to find a good translation I need to know a little bit more about it. + + There is a scheme in the Aho/Sethi/Ullman compiler book for compressing + scanner tables. It involves creating two pairs of tables. The first has + "base" and "default" entries, the second has "next" and "check" entries. + The "base" entry is indexed by the current state and yields an index into + the next/check table. The "default" entry gives what to do if the state + transition isn't found in next/check. The "next" entry gives the next + state to enter, but only if the "check" entry verifies that this entry is + correct for the current state. Flex creates templates of series of + next/check entries and then encodes differences from these templates as a + way to compress the tables. + + > #: main.c:533 + > msgid " %d/%d base-def entries created\n" + > + > The same problem here for 'base-def'. + + See above. + + Vern + + +File: flex.info, Node: unput() messes up yy_at_bol, Next: The | operator is not doing what I want, Prev: Can you discuss some flex internals?, Up: FAQ + +unput() messes up yy_at_bol +=========================== + + To: Xinying Li <xli@npac.syr.edu> + Subject: Re: FLEX ? + In-reply-to: Your message of Wed, 13 Nov 1996 17:28:38 PST. + Date: Wed, 13 Nov 1996 19:51:54 PST + From: Vern Paxson <vern> + + > "unput()" them to input flow, question occurs. If I do this after I scan + > a carriage, the variable "YY_CURRENT_BUFFER->yy_at_bol" is changed. That + > means the carriage flag has gone. + + You can control this by calling yy_set_bol(). It's described in the manual. + + > And if in pre-reading it goes to the end of file, is anything done + > to control the end of curren buffer and end of file? + + No, there's no way to put back an end-of-file. + + > By the way I am using flex 2.5.2 and using the "-l". + + The latest release is 2.5.4, by the way. It fixes some bugs in 2.5.2 and + 2.5.3. You can get it from ftp.ee.lbl.gov. + + Vern + + +File: flex.info, Node: The | operator is not doing what I want, Next: Why can't flex understand this variable trailing context pattern?, Prev: unput() messes up yy_at_bol, Up: FAQ + +The | operator is not doing what I want +======================================= + + To: Alain.ISSARD@st.com + Subject: Re: Start condition with FLEX + In-reply-to: Your message of Mon, 18 Nov 1996 09:45:02 PST. + Date: Mon, 18 Nov 1996 10:41:34 PST + From: Vern Paxson <vern> + + > I am not able to use the start condition scope and to use the | (OR) with + > rules having start conditions. + + The problem is that if you use '|' as a regular expression operator, for + example "a|b" meaning "match either 'a' or 'b'", then it must *not* have + any blanks around it. If you instead want the special '|' *action* (which + from your scanner appears to be the case), which is a way of giving two + different rules the same action: + + foo | + bar matched_foo_or_bar(); + + then '|' *must* be separated from the first rule by whitespace and *must* + be followed by a new line. You *cannot* write it as: + + foo | bar matched_foo_or_bar(); + + even though you might think you could because yacc supports this syntax. + The reason for this unfortunately incompatibility is historical, but it's + unlikely to be changed. + + Your problems with start condition scope are simply due to syntax errors + from your use of '|' later confusing flex. + + Let me know if you still have problems. + + Vern + + +File: flex.info, Node: Why can't flex understand this variable trailing context pattern?, Next: The ^ operator isn't working, Prev: The | operator is not doing what I want, Up: FAQ + +Why can't flex understand this variable trailing context pattern? +================================================================= + + To: Gregory Margo <gmargo@newton.vip.best.com> + Subject: Re: flex-2.5.3 bug report + In-reply-to: Your message of Sat, 23 Nov 1996 16:50:09 PST. + Date: Sat, 23 Nov 1996 17:07:32 PST + From: Vern Paxson <vern> + + > Enclosed is a lex file that "real" lex will process, but I cannot get + > flex to process it. Could you try it and maybe point me in the right direction? + + Your problem is that some of the definitions in the scanner use the '/' + trailing context operator, and have it enclosed in ()'s. Flex does not + allow this operator to be enclosed in ()'s because doing so allows undefined + regular expressions such as "(a/b)+". So the solution is to remove the + parentheses. Note that you must also be building the scanner with the -l + option for AT&T lex compatibility. Without this option, flex automatically + encloses the definitions in parentheses. + + Vern + + +File: flex.info, Node: The ^ operator isn't working, Next: Trailing context is getting confused with trailing optional patterns, Prev: Why can't flex understand this variable trailing context pattern?, Up: FAQ + +The ^ operator isn't working +============================ + + To: Thomas Hadig <hadig@toots.physik.rwth-aachen.de> + Subject: Re: Flex Bug ? + In-reply-to: Your message of Tue, 26 Nov 1996 14:35:01 PST. + Date: Tue, 26 Nov 1996 11:15:05 PST + From: Vern Paxson <vern> + + > In my lexer code, i have the line : + > ^\*.* { } + > + > Thus all lines starting with an astrix (*) are comment lines. + > This does not work ! + + I can't get this problem to reproduce - it works fine for me. Note + though that if what you have is slightly different: + + COMMENT ^\*.* + %% + {COMMENT} { } + + then it won't work, because flex pushes back macro definitions enclosed + in ()'s, so the rule becomes + + (^\*.*) { } + + and now that the '^' operator is not at the immediate beginning of the + line, it's interpreted as just a regular character. You can avoid this + behavior by using the "-l" lex-compatibility flag, or "%option lex-compat". + + Vern + + +File: flex.info, Node: Trailing context is getting confused with trailing optional patterns, Next: Is flex GNU or not?, Prev: The ^ operator isn't working, Up: FAQ + +Trailing context is getting confused with trailing optional patterns +==================================================================== + + To: Adoram Rogel <adoram@hybridge.com> + Subject: Re: Flex 2.5.4 BOF ??? + In-reply-to: Your message of Tue, 26 Nov 1996 16:10:41 PST. + Date: Wed, 27 Nov 1996 10:56:25 PST + From: Vern Paxson <vern> + + > Organization(s)?/[a-z] + > + > This matched "Organizations" (looking in debug mode, the trailing s + > was matched with trailing context instead of the optional (s) in the + > end of the word. + + That should only happen with lex. Flex can properly match this pattern. + (That might be what you're saying, I'm just not sure.) + + > Is there a way to avoid this dangerous trailing context problem ? + + Unfortunately, there's no easy way. On the other hand, I don't see why + it should be a problem. Lex's matching is clearly wrong, and I'd hope + that usually the intent remains the same as expressed with the pattern, + so flex's matching will be correct. + + Vern + + +File: flex.info, Node: Is flex GNU or not?, Next: ERASEME53, Prev: Trailing context is getting confused with trailing optional patterns, Up: FAQ + +Is flex GNU or not? +=================== + + To: Cameron MacKinnon <mackin@interlog.com> + Subject: Re: Flex documentation bug + In-reply-to: Your message of Mon, 02 Dec 1996 00:07:08 PST. + Date: Sun, 01 Dec 1996 22:29:39 PST + From: Vern Paxson <vern> + + > I'm not sure how or where to submit bug reports (documentation or + > otherwise) for the GNU project stuff ... + + Well, strictly speaking flex isn't part of the GNU project. They just + distribute it because no one's written a decent GPL'd lex replacement. + So you should send bugs directly to me. Those sent to the GNU folks + sometimes find there way to me, but some may drop between the cracks. + + > In GNU Info, under the section 'Start Conditions', and also in the man + > page (mine's dated April '95) is a nice little snippet showing how to + > parse C quoted strings into a buffer, defined to be MAX_STR_CONST in + > size. Unfortunately, no overflow checking is ever done ... + + This is already mentioned in the manual: + + Finally, here's an example of how to match C-style quoted + strings using exclusive start conditions, including expanded + escape sequences (but not including checking for a string + that's too long): + + The reason for not doing the overflow checking is that it will needlessly + clutter up an example whose main purpose is just to demonstrate how to + use flex. + + The latest release is 2.5.4, by the way, available from ftp.ee.lbl.gov. + + Vern + + +File: flex.info, Node: ERASEME53, Next: I need to scan if-then-else blocks and while loops, Prev: Is flex GNU or not?, Up: FAQ + +ERASEME53 +========= + + To: tsv@cs.UManitoba.CA + Subject: Re: Flex (reg).. + In-reply-to: Your message of Thu, 06 Mar 1997 23:50:16 PST. + Date: Thu, 06 Mar 1997 15:54:19 PST + From: Vern Paxson <vern> + + > [:alpha:] ([:alnum:] | \\_)* + + If your rule really has embedded blanks as shown above, then it won't + work, as the first blank delimits the rule from the action. (It wouldn't + even compile ...) You need instead: + + [:alpha:]([:alnum:]|\\_)* + + and that should work fine - there's no restriction on what can go inside + of ()'s except for the trailing context operator, '/'. + + Vern + + +File: flex.info, Node: I need to scan if-then-else blocks and while loops, Next: ERASEME55, Prev: ERASEME53, Up: FAQ + +I need to scan if-then-else blocks and while loops +================================================== + + To: "Mike Stolnicki" <mstolnic@ford.com> + Subject: Re: FLEX help + In-reply-to: Your message of Fri, 30 May 1997 13:33:27 PDT. + Date: Fri, 30 May 1997 10:46:35 PDT + From: Vern Paxson <vern> + + > We'd like to add "if-then-else", "while", and "for" statements to our + > language ... + > We've investigated many possible solutions. The one solution that seems + > the most reasonable involves knowing the position of a TOKEN in yyin. + + I strongly advise you to instead build a parse tree (abstract syntax tree) + and loop over that instead. You'll find this has major benefits in keeping + your interpreter simple and extensible. + + That said, the functionality you mention for get_position and set_position + have been on the to-do list for a while. As flex is a purely spare-time + project for me, no guarantees when this will be added (in particular, it + for sure won't be for many months to come). + + Vern + + +File: flex.info, Node: ERASEME55, Next: ERASEME56, Prev: I need to scan if-then-else blocks and while loops, Up: FAQ + +ERASEME55 +========= + + To: Colin Paul Adams <colin@colina.demon.co.uk> + Subject: Re: Flex C++ classes and Bison + In-reply-to: Your message of 09 Aug 1997 17:11:41 PDT. + Date: Fri, 15 Aug 1997 10:48:19 PDT + From: Vern Paxson <vern> + + > #define YY_DECL int yylex (YYSTYPE *lvalp, struct parser_control + > *parm) + > + > I have been trying to get this to work as a C++ scanner, but it does + > not appear to be possible (warning that it matches no declarations in + > yyFlexLexer, or something like that). + > + > Is this supposed to be possible, or is it being worked on (I DID + > notice the comment that scanner classes are still experimental, so I'm + > not too hopeful)? + + What you need to do is derive a subclass from yyFlexLexer that provides + the above yylex() method, squirrels away lvalp and parm into member + variables, and then invokes yyFlexLexer::yylex() to do the regular scanning. + + Vern + + +File: flex.info, Node: ERASEME56, Next: ERASEME57, Prev: ERASEME55, Up: FAQ + +ERASEME56 +========= + + To: Mikael.Latvala@lmf.ericsson.se + Subject: Re: Possible mistake in Flex v2.5 document + In-reply-to: Your message of Fri, 05 Sep 1997 16:07:24 PDT. + Date: Fri, 05 Sep 1997 10:01:54 PDT + From: Vern Paxson <vern> + + > In that example you show how to count comment lines when using + > C style /* ... */ comments. My question is, shouldn't you take into + > account a scenario where end of a comment marker occurs inside + > character or string literals? + + The scanner certainly needs to also scan character and string literals. + However it does that (there's an example in the man page for strings), the + lexer will recognize the beginning of the literal before it runs across the + embedded "/*". Consequently, it will finish scanning the literal before it + even considers the possibility of matching "/*". + + Example: + + '([^']*|{ESCAPE_SEQUENCE})' + + will match all the text between the ''s (inclusive). So the lexer + considers this as a token beginning at the first ', and doesn't even + attempt to match other tokens inside it. + + I thinnk this subtlety is not worth putting in the manual, as I suspect + it would confuse more people than it would enlighten. + + Vern + + +File: flex.info, Node: ERASEME57, Next: Is there a repository for flex scanners?, Prev: ERASEME56, Up: FAQ + +ERASEME57 +========= + + To: "Marty Leisner" <leisner@sdsp.mc.xerox.com> + Subject: Re: flex limitations + In-reply-to: Your message of Sat, 06 Sep 1997 11:27:21 PDT. + Date: Mon, 08 Sep 1997 11:38:08 PDT + From: Vern Paxson <vern> + + > %% + > [a-zA-Z]+ /* skip a line */ + > { printf("got %s\n", yytext); } + > %% + + What version of flex are you using? If I feed this to 2.5.4, it complains: + + "bug.l", line 5: EOF encountered inside an action + "bug.l", line 5: unrecognized rule + "bug.l", line 5: fatal parse error + + Not the world's greatest error message, but it manages to flag the problem. + + (With the introduction of start condition scopes, flex can't accommodate + an action on a separate line, since it's ambiguous with an indented rule.) + + You can get 2.5.4 from ftp.ee.lbl.gov. + + Vern + + +File: flex.info, Node: Is there a repository for flex scanners?, Next: How can I conditionally compile or preprocess my flex input file?, Prev: ERASEME57, Up: FAQ + +Is there a repository for flex scanners? +======================================== + +Not that we know of. You might try asking on comp.compilers. + + +File: flex.info, Node: How can I conditionally compile or preprocess my flex input file?, Next: Where can I find grammars for lex and yacc?, Prev: Is there a repository for flex scanners?, Up: FAQ + +How can I conditionally compile or preprocess my flex input file? +================================================================= + +Flex doesn't have a preprocessor like C does. You might try using m4, +or the C preprocessor plus a sed script to clean up the result. + + +File: flex.info, Node: Where can I find grammars for lex and yacc?, Next: I get an end-of-buffer message for each character scanned., Prev: How can I conditionally compile or preprocess my flex input file?, Up: FAQ + +Where can I find grammars for lex and yacc? +=========================================== + +In the sources for flex and bison. + + +File: flex.info, Node: I get an end-of-buffer message for each character scanned., Next: unnamed-faq-62, Prev: Where can I find grammars for lex and yacc?, Up: FAQ + +I get an end-of-buffer message for each character scanned. +========================================================== + +This will happen if your LexerInput() function returns only one +character at a time, which can happen either if you're scanner is +"interactive", or if the streams library on your platform always returns +1 for yyin->gcount(). + + Solution: override LexerInput() with a version that returns whole +buffers. + + +File: flex.info, Node: unnamed-faq-62, Next: unnamed-faq-63, Prev: I get an end-of-buffer message for each character scanned., Up: FAQ + +unnamed-faq-62 +============== + + To: Georg.Rehm@CL-KI.Uni-Osnabrueck.DE + Subject: Re: Flex maximums + In-reply-to: Your message of Mon, 17 Nov 1997 17:16:06 PST. + Date: Mon, 17 Nov 1997 17:16:15 PST + From: Vern Paxson <vern> + + > I took a quick look into the flex-sources and altered some #defines in + > flexdefs.h: + > + > #define INITIAL_MNS 64000 + > #define MNS_INCREMENT 1024000 + > #define MAXIMUM_MNS 64000 + + The things to fix are to add a couple of zeroes to: + + #define JAMSTATE -32766 /* marks a reference to the state that always jams */ + #define MAXIMUM_MNS 31999 + #define BAD_SUBSCRIPT -32767 + #define MAX_SHORT 32700 + + and, if you get complaints about too many rules, make the following change too: + + #define YY_TRAILING_MASK 0x200000 + #define YY_TRAILING_HEAD_MASK 0x400000 + + - Vern + + +File: flex.info, Node: unnamed-faq-63, Next: unnamed-faq-64, Prev: unnamed-faq-62, Up: FAQ + +unnamed-faq-63 +============== + + To: jimmey@lexis-nexis.com (Jimmey Todd) + Subject: Re: FLEX question regarding istream vs ifstream + In-reply-to: Your message of Mon, 08 Dec 1997 15:54:15 PST. + Date: Mon, 15 Dec 1997 13:21:35 PST + From: Vern Paxson <vern> + + > stdin_handle = YY_CURRENT_BUFFER; + > ifstream fin( "aFile" ); + > yy_switch_to_buffer( yy_create_buffer( fin, YY_BUF_SIZE ) ); + > + > What I'm wanting to do, is pass the contents of a file thru one set + > of rules and then pass stdin thru another set... It works great if, I + > don't use the C++ classes. But since everything else that I'm doing is + > in C++, I thought I'd be consistent. + > + > The problem is that 'yy_create_buffer' is expecting an istream* as it's + > first argument (as stated in the man page). However, fin is a ifstream + > object. Any ideas on what I might be doing wrong? Any help would be + > appreciated. Thanks!! + + You need to pass &fin, to turn it into an ifstream* instead of an ifstream. + Then its type will be compatible with the expected istream*, because ifstream + is derived from istream. + + Vern + + +File: flex.info, Node: unnamed-faq-64, Next: unnamed-faq-65, Prev: unnamed-faq-63, Up: FAQ + +unnamed-faq-64 +============== + + To: Enda Fadian <fadiane@piercom.ie> + Subject: Re: Question related to Flex man page? + In-reply-to: Your message of Tue, 16 Dec 1997 15:17:34 PST. + Date: Tue, 16 Dec 1997 14:17:09 PST + From: Vern Paxson <vern> + + > Can you explain to me what is ment by a long-jump in relation to flex? + + Using the longjmp() function while inside yylex() or a routine called by it. + + > what is the flex activation frame. + + Just yylex()'s stack frame. + + > As far as I can see yyrestart will bring me back to the sart of the input + > file and using flex++ isnot really an option! + + No, yyrestart() doesn't imply a rewind, even though its name might sound + like it does. It tells the scanner to flush its internal buffers and + start reading from the given file at its present location. + + Vern + + +File: flex.info, Node: unnamed-faq-65, Next: unnamed-faq-66, Prev: unnamed-faq-64, Up: FAQ + +unnamed-faq-65 +============== + + To: hassan@larc.info.uqam.ca (Hassan Alaoui) + Subject: Re: Need urgent Help + In-reply-to: Your message of Sat, 20 Dec 1997 19:38:19 PST. + Date: Sun, 21 Dec 1997 21:30:46 PST + From: Vern Paxson <vern> + + > /usr/lib/yaccpar: In function `int yyparse()': + > /usr/lib/yaccpar:184: warning: implicit declaration of function `int yylex(...)' + > + > ld: Undefined symbol + > _yylex + > _yyparse + > _yyin + + This is a known problem with Solaris C++ (and/or Solaris yacc). I believe + the fix is to explicitly insert some 'extern "C"' statements for the + corresponding routines/symbols. + + Vern + + +File: flex.info, Node: unnamed-faq-66, Next: unnamed-faq-67, Prev: unnamed-faq-65, Up: FAQ + +unnamed-faq-66 +============== + + To: mc0307@mclink.it + Cc: gnu@prep.ai.mit.edu + Subject: Re: [mc0307@mclink.it: Help request] + In-reply-to: Your message of Fri, 12 Dec 1997 17:57:29 PST. + Date: Sun, 21 Dec 1997 22:33:37 PST + From: Vern Paxson <vern> + + > This is my definition for float and integer types: + > . . . + > NZD [1-9] + > ... + > I've tested my program on other lex version (on UNIX Sun Solaris an HP + > UNIX) and it work well, so I think that my definitions are correct. + > There are any differences between Lex and Flex? + + There are indeed differences, as discussed in the man page. The one + you are probably running into is that when flex expands a name definition, + it puts parentheses around the expansion, while lex does not. There's + an example in the man page of how this can lead to different matching. + Flex's behavior complies with the POSIX standard (or at least with the + last POSIX draft I saw). + + Vern + + +File: flex.info, Node: unnamed-faq-67, Next: unnamed-faq-68, Prev: unnamed-faq-66, Up: FAQ + +unnamed-faq-67 +============== + + To: hassan@larc.info.uqam.ca (Hassan Alaoui) + Subject: Re: Thanks + In-reply-to: Your message of Mon, 22 Dec 1997 16:06:35 PST. + Date: Mon, 22 Dec 1997 14:35:05 PST + From: Vern Paxson <vern> + + > Thank you very much for your help. I compile and link well with C++ while + > declaring 'yylex ...' extern, But a little problem remains. I get a + > segmentation default when executing ( I linked with lfl library) while it + > works well when using LEX instead of flex. Do you have some ideas about the + > reason for this ? + + The one possible reason for this that comes to mind is if you've defined + yytext as "extern char yytext[]" (which is what lex uses) instead of + "extern char *yytext" (which is what flex uses). If it's not that, then + I'm afraid I don't know what the problem might be. + + Vern + + +File: flex.info, Node: unnamed-faq-68, Next: unnamed-faq-69, Prev: unnamed-faq-67, Up: FAQ + +unnamed-faq-68 +============== + + To: "Bart Niswonger" <NISWONGR@almaden.ibm.com> + Subject: Re: flex 2.5: c++ scanners & start conditions + In-reply-to: Your message of Tue, 06 Jan 1998 10:34:21 PST. + Date: Tue, 06 Jan 1998 19:19:30 PST + From: Vern Paxson <vern> + + > The problem is that when I do this (using %option c++) start + > conditions seem to not apply. + + The BEGIN macro modifies the yy_start variable. For C scanners, this + is a static with scope visible through the whole file. For C++ scanners, + it's a member variable, so it only has visible scope within a member + function. Your lexbegin() routine is not a member function when you + build a C++ scanner, so it's not modifying the correct yy_start. The + diagnostic that indicates this is that you found you needed to add + a declaration of yy_start in order to get your scanner to compile when + using C++; instead, the correct fix is to make lexbegin() a member + function (by deriving from yyFlexLexer). + + Vern + + +File: flex.info, Node: unnamed-faq-69, Next: unnamed-faq-70, Prev: unnamed-faq-68, Up: FAQ + +unnamed-faq-69 +============== + + To: "Boris Zinin" <boris@ippe.rssi.ru> + Subject: Re: current position in flex buffer + In-reply-to: Your message of Mon, 12 Jan 1998 18:58:23 PST. + Date: Mon, 12 Jan 1998 12:03:15 PST + From: Vern Paxson <vern> + + > The problem is how to determine the current position in flex active + > buffer when a rule is matched.... + + You will need to keep track of this explicitly, such as by redefining + YY_USER_ACTION to count the number of characters matched. + + The latest flex release, by the way, is 2.5.4, available from ftp.ee.lbl.gov. + + Vern + + +File: flex.info, Node: unnamed-faq-70, Next: unnamed-faq-71, Prev: unnamed-faq-69, Up: FAQ + +unnamed-faq-70 +============== + + To: Bik.Dhaliwal@bis.org + Subject: Re: Flex question + In-reply-to: Your message of Mon, 26 Jan 1998 13:05:35 PST. + Date: Tue, 27 Jan 1998 22:41:52 PST + From: Vern Paxson <vern> + + > That requirement involves knowing + > the character position at which a particular token was matched + > in the lexer. + + The way you have to do this is by explicitly keeping track of where + you are in the file, by counting the number of characters scanned + for each token (available in yyleng). It may prove convenient to + do this by redefining YY_USER_ACTION, as described in the manual. + + Vern + + +File: flex.info, Node: unnamed-faq-71, Next: unnamed-faq-72, Prev: unnamed-faq-70, Up: FAQ + +unnamed-faq-71 +============== + + To: Vladimir Alexiev <vladimir@cs.ualberta.ca> + Subject: Re: flex: how to control start condition from parser? + In-reply-to: Your message of Mon, 26 Jan 1998 05:50:16 PST. + Date: Tue, 27 Jan 1998 22:45:37 PST + From: Vern Paxson <vern> + + > It seems useful for the parser to be able to tell the lexer about such + > context dependencies, because then they don't have to be limited to + > local or sequential context. + + One way to do this is to have the parser call a stub routine that's + included in the scanner's .l file, and consequently that has access ot + BEGIN. The only ugliness is that the parser can't pass in the state + it wants, because those aren't visible - but if you don't have many + such states, then using a different set of names doesn't seem like + to much of a burden. + + While generating a .h file like you suggests is certainly cleaner, + flex development has come to a virtual stand-still :-(, so a workaround + like the above is much more pragmatic than waiting for a new feature. + + Vern + + +File: flex.info, Node: unnamed-faq-72, Next: unnamed-faq-73, Prev: unnamed-faq-71, Up: FAQ + +unnamed-faq-72 +============== + + To: Barbara Denny <denny@3com.com> + Subject: Re: freebsd flex bug? + In-reply-to: Your message of Fri, 30 Jan 1998 12:00:43 PST. + Date: Fri, 30 Jan 1998 12:42:32 PST + From: Vern Paxson <vern> + + > lex.yy.c:1996: parse error before `=' + + This is the key, identifying this error. (It may help to pinpoint + it by using flex -L, so it doesn't generate #line directives in its + output.) I will bet you heavy money that you have a start condition + name that is also a variable name, or something like that; flex spits + out #define's for each start condition name, mapping them to a number, + so you can wind up with: + + %x foo + %% + ... + %% + void bar() + { + int foo = 3; + } + + and the penultimate will turn into "int 1 = 3" after C preprocessing, + since flex will put "#define foo 1" in the generated scanner. + + Vern + + +File: flex.info, Node: unnamed-faq-73, Next: unnamed-faq-74, Prev: unnamed-faq-72, Up: FAQ + +unnamed-faq-73 +============== + + To: Maurice Petrie <mpetrie@infoscigroup.com> + Subject: Re: Lost flex .l file + In-reply-to: Your message of Mon, 02 Feb 1998 14:10:01 PST. + Date: Mon, 02 Feb 1998 11:15:12 PST + From: Vern Paxson <vern> + + > I am curious as to + > whether there is a simple way to backtrack from the generated source to + > reproduce the lost list of tokens we are searching on. + + In theory, it's straight-forward to go from the DFA representation + back to a regular-expression representation - the two are isomorphic. + In practice, a huge headache, because you have to unpack all the tables + back into a single DFA representation, and then write a program to munch + on that and translate it into an RE. + + Sorry for the less-than-happy news ... + + Vern + + +File: flex.info, Node: unnamed-faq-74, Next: unnamed-faq-75, Prev: unnamed-faq-73, Up: FAQ + +unnamed-faq-74 +============== + + To: jimmey@lexis-nexis.com (Jimmey Todd) + Subject: Re: Flex performance question + In-reply-to: Your message of Thu, 19 Feb 1998 11:01:17 PST. + Date: Thu, 19 Feb 1998 08:48:51 PST + From: Vern Paxson <vern> + + > What I have found, is that the smaller the data chunk, the faster the + > program executes. This is the opposite of what I expected. Should this be + > happening this way? + + This is exactly what will happen if your input file has embedded NULs. + From the man page: + + A final note: flex is slow when matching NUL's, particularly + when a token contains multiple NUL's. It's best to write + rules which match short amounts of text if it's anticipated + that the text will often include NUL's. + + So that's the first thing to look for. + + Vern + + +File: flex.info, Node: unnamed-faq-75, Next: unnamed-faq-76, Prev: unnamed-faq-74, Up: FAQ + +unnamed-faq-75 +============== + + To: jimmey@lexis-nexis.com (Jimmey Todd) + Subject: Re: Flex performance question + In-reply-to: Your message of Thu, 19 Feb 1998 11:01:17 PST. + Date: Thu, 19 Feb 1998 15:42:25 PST + From: Vern Paxson <vern> + + So there are several problems. + + First, to go fast, you want to match as much text as possible, which + your scanners don't in the case that what they're scanning is *not* + a <RN> tag. So you want a rule like: + + [^<]+ + + Second, C++ scanners are particularly slow if they're interactive, + which they are by default. Using -B speeds it up by a factor of 3-4 + on my workstation. + + Third, C++ scanners that use the istream interface are slow, because + of how poorly implemented istream's are. I built two versions of + the following scanner: + + %% + .*\n + .* + %% + + and the C version inhales a 2.5MB file on my workstation in 0.8 seconds. + The C++ istream version, using -B, takes 3.8 seconds. + + Vern + + +File: flex.info, Node: unnamed-faq-76, Next: unnamed-faq-77, Prev: unnamed-faq-75, Up: FAQ + +unnamed-faq-76 +============== + + To: "Frescatore, David (CRD, TAD)" <frescatore@exc01crdge.crd.ge.com> + Subject: Re: FLEX 2.5 & THE YEAR 2000 + In-reply-to: Your message of Wed, 03 Jun 1998 11:26:22 PDT. + Date: Wed, 03 Jun 1998 10:22:26 PDT + From: Vern Paxson <vern> + + > I am researching the Y2K problem with General Electric R&D + > and need to know if there are any known issues concerning + > the above mentioned software and Y2K regardless of version. + + There shouldn't be, all it ever does with the date is ask the system + for it and then print it out. + + Vern + + +File: flex.info, Node: unnamed-faq-77, Next: unnamed-faq-78, Prev: unnamed-faq-76, Up: FAQ + +unnamed-faq-77 +============== + + To: "Hans Dermot Doran" <htd@ibhdoran.com> + Subject: Re: flex problem + In-reply-to: Your message of Wed, 15 Jul 1998 21:30:13 PDT. + Date: Tue, 21 Jul 1998 14:23:34 PDT + From: Vern Paxson <vern> + + > To overcome this, I gets() the stdin into a string and lex the string. The + > string is lexed OK except that the end of string isn't lexed properly + > (yy_scan_string()), that is the lexer dosn't recognise the end of string. + + Flex doesn't contain mechanisms for recognizing buffer endpoints. But if + you use fgets instead (which you should anyway, to protect against buffer + overflows), then the final \n will be preserved in the string, and you can + scan that in order to find the end of the string. + + Vern + + +File: flex.info, Node: unnamed-faq-78, Next: unnamed-faq-79, Prev: unnamed-faq-77, Up: FAQ + +unnamed-faq-78 +============== + + To: soumen@almaden.ibm.com + Subject: Re: Flex++ 2.5.3 instance member vs. static member + In-reply-to: Your message of Mon, 27 Jul 1998 02:10:04 PDT. + Date: Tue, 28 Jul 1998 01:10:34 PDT + From: Vern Paxson <vern> + + > %{ + > int mylineno = 0; + > %} + > ws [ \t]+ + > alpha [A-Za-z] + > dig [0-9] + > %% + > + > Now you'd expect mylineno to be a member of each instance of class + > yyFlexLexer, but is this the case? A look at the lex.yy.cc file seems to + > indicate otherwise; unless I am missing something the declaration of + > mylineno seems to be outside any class scope. + > + > How will this work if I want to run a multi-threaded application with each + > thread creating a FlexLexer instance? + + Derive your own subclass and make mylineno a member variable of it. + + Vern + + +File: flex.info, Node: unnamed-faq-79, Next: unnamed-faq-80, Prev: unnamed-faq-78, Up: FAQ + +unnamed-faq-79 +============== + + To: Adoram Rogel <adoram@hybridge.com> + Subject: Re: More than 32K states change hangs + In-reply-to: Your message of Tue, 04 Aug 1998 16:55:39 PDT. + Date: Tue, 04 Aug 1998 22:28:45 PDT + From: Vern Paxson <vern> + + > Vern Paxson, + > + > I followed your advice, posted on Usenet bu you, and emailed to me + > personally by you, on how to overcome the 32K states limit. I'm running + > on Linux machines. + > I took the full source of version 2.5.4 and did the following changes in + > flexdef.h: + > #define JAMSTATE -327660 + > #define MAXIMUM_MNS 319990 + > #define BAD_SUBSCRIPT -327670 + > #define MAX_SHORT 327000 + > + > and compiled. + > All looked fine, including check and bigcheck, so I installed. + + Hmmm, you shouldn't increase MAX_SHORT, though looking through my email + archives I see that I did indeed recommend doing so. Try setting it back + to 32700; that should suffice that you no longer need -Ca. If it still + hangs, then the interesting question is - where? + + > Compiling the same hanged program with a out-of-the-box (RedHat 4.2 + > distribution of Linux) + > flex 2.5.4 binary works. + + Since Linux comes with source code, you should diff it against what + you have to see what problems they missed. + + > Should I always compile with the -Ca option now ? even short and simple + > filters ? + + No, definitely not. It's meant to be for those situations where you + absolutely must squeeze every last cycle out of your scanner. + + Vern + + +File: flex.info, Node: unnamed-faq-80, Next: unnamed-faq-81, Prev: unnamed-faq-79, Up: FAQ + +unnamed-faq-80 +============== + + To: "Schmackpfeffer, Craig" <Craig.Schmackpfeffer@usa.xerox.com> + Subject: Re: flex output for static code portion + In-reply-to: Your message of Tue, 11 Aug 1998 11:55:30 PDT. + Date: Mon, 17 Aug 1998 23:57:42 PDT + From: Vern Paxson <vern> + + > I would like to use flex under the hood to generate a binary file + > containing the data structures that control the parse. + + This has been on the wish-list for a long time. In principle it's + straight-forward - you redirect mkdata() et al's I/O to another file, + and modify the skeleton to have a start-up function that slurps these + into dynamic arrays. The concerns are (1) the scanner generation code + is hairy and full of corner cases, so it's easy to get surprised when + going down this path :-( ; and (2) being careful about buffering so + that when the tables change you make sure the scanner starts in the + correct state and reading at the right point in the input file. + + > I was wondering if you know of anyone who has used flex in this way. + + I don't - but it seems like a reasonable project to undertake (unlike + numerous other flex tweaks :-). + + Vern + + +File: flex.info, Node: unnamed-faq-81, Next: unnamed-faq-82, Prev: unnamed-faq-80, Up: FAQ + +unnamed-faq-81 +============== + + Received: from 131.173.17.11 (131.173.17.11 [131.173.17.11]) + by ee.lbl.gov (8.9.1/8.9.1) with ESMTP id AAA03838 + for <vern@ee.lbl.gov>; Thu, 20 Aug 1998 00:47:57 -0700 (PDT) + Received: from hal.cl-ki.uni-osnabrueck.de (hal.cl-ki.Uni-Osnabrueck.DE [131.173.141.2]) + by deimos.rz.uni-osnabrueck.de (8.8.7/8.8.8) with ESMTP id JAA34694 + for <vern@ee.lbl.gov>; Thu, 20 Aug 1998 09:47:55 +0200 + Received: (from georg@localhost) by hal.cl-ki.uni-osnabrueck.de (8.6.12/8.6.12) id JAA34834 for vern@ee.lbl.gov; Thu, 20 Aug 1998 09:47:54 +0200 + From: Georg Rehm <georg@hal.cl-ki.uni-osnabrueck.de> + Message-Id: <199808200747.JAA34834@hal.cl-ki.uni-osnabrueck.de> + Subject: "flex scanner push-back overflow" + To: vern@ee.lbl.gov + Date: Thu, 20 Aug 1998 09:47:54 +0200 (MEST) + Reply-To: Georg.Rehm@CL-KI.Uni-Osnabrueck.DE + X-NoJunk: Do NOT send commercial mail, spam or ads to this address! + X-URL: http://www.cl-ki.uni-osnabrueck.de/~georg/ + X-Mailer: ELM [version 2.4ME+ PL28 (25)] + MIME-Version: 1.0 + Content-Type: text/plain; charset=US-ASCII + Content-Transfer-Encoding: 7bit + + Hi Vern, + + Yesterday, I encountered a strange problem: I use the macro processor m4 + to include some lengthy lists into a .l file. Following is a flex macro + definition that causes some serious pain in my neck: + + AUTHOR ("A. Boucard / L. Boucard"|"A. Dastarac / M. Levent"|"A.Boucaud / L.Boucaud"|"Abderrahim Lamchichi"|"Achmat Dangor"|"Adeline Toullier"|"Adewale Maja-Pearce"|"Ahmed Ziri"|"Akram Ellyas"|"Alain Bihr"|"Alain Gresh"|"Alain Guillemoles"|"Alain Joxe"|"Alain Morice"|"Alain Renon"|"Alain Zecchini"|"Albert Memmi"|"Alberto Manguel"|"Alex De Waal"|"Alfonso Artico"| [...]) + + The complete list contains about 10kB. When I try to "flex" this file + (on a Solaris 2.6 machine, using a modified flex 2.5.4 (I only increased + some of the predefined values in flexdefs.h) I get the error: + + myflex/flex -8 sentag.tmp.l + flex scanner push-back overflow + + When I remove the slashes in the macro definition everything works fine. + As I understand it, the double quotes escape the slash-character so it + really means "/" and not "trailing context". Furthermore, I tried to + escape the slashes with backslashes, but with no use, the same error message + appeared when flexing the code. + + Do you have an idea what's going on here? + + Greetings from Germany, + Georg + -- + Georg Rehm georg@cl-ki.uni-osnabrueck.de + Institute for Semantic Information Processing, University of Osnabrueck, FRG + + +File: flex.info, Node: unnamed-faq-82, Next: unnamed-faq-83, Prev: unnamed-faq-81, Up: FAQ + +unnamed-faq-82 +============== + + To: Georg.Rehm@CL-KI.Uni-Osnabrueck.DE + Subject: Re: "flex scanner push-back overflow" + In-reply-to: Your message of Thu, 20 Aug 1998 09:47:54 PDT. + Date: Thu, 20 Aug 1998 07:05:35 PDT + From: Vern Paxson <vern> + + > myflex/flex -8 sentag.tmp.l + > flex scanner push-back overflow + + Flex itself uses a flex scanner. That scanner is running out of buffer + space when it tries to unput() the humongous macro you've defined. When + you remove the '/'s, you make it small enough so that it fits in the buffer; + removing spaces would do the same thing. + + The fix is to either rethink how come you're using such a big macro and + perhaps there's another/better way to do it; or to rebuild flex's own + scan.c with a larger value for + + #define YY_BUF_SIZE 16384 + + - Vern + + +File: flex.info, Node: unnamed-faq-83, Next: unnamed-faq-84, Prev: unnamed-faq-82, Up: FAQ + +unnamed-faq-83 +============== + + To: Jan Kort <jan@research.techforce.nl> + Subject: Re: Flex + In-reply-to: Your message of Fri, 04 Sep 1998 12:18:43 +0200. + Date: Sat, 05 Sep 1998 00:59:49 PDT + From: Vern Paxson <vern> + + > %% + > + > "TEST1\n" { fprintf(stderr, "TEST1\n"); yyless(5); } + > ^\n { fprintf(stderr, "empty line\n"); } + > . { } + > \n { fprintf(stderr, "new line\n"); } + > + > %% + > -- input --------------------------------------- + > TEST1 + > -- output -------------------------------------- + > TEST1 + > empty line + > ------------------------------------------------ + + IMHO, it's not clear whether or not this is in fact a bug. It depends + on whether you view yyless() as backing up in the input stream, or as + pushing new characters onto the beginning of the input stream. Flex + interprets it as the latter (for implementation convenience, I'll admit), + and so considers the newline as in fact matching at the beginning of a + line, as after all the last token scanned an entire line and so the + scanner is now at the beginning of a new line. + + I agree that this is counter-intuitive for yyless(), given its + functional description (it's less so for unput(), depending on whether + you're unput()'ing new text or scanned text). But I don't plan to + change it any time soon, as it's a pain to do so. Consequently, + you do indeed need to use yy_set_bol() and YY_AT_BOL() to tweak + your scanner into the behavior you desire. + + Sorry for the less-than-completely-satisfactory answer. + + Vern + + +File: flex.info, Node: unnamed-faq-84, Next: unnamed-faq-85, Prev: unnamed-faq-83, Up: FAQ + +unnamed-faq-84 +============== + + To: Patrick Krusenotto <krusenot@mac-info-link.de> + Subject: Re: Problems with restarting flex-2.5.2-generated scanner + In-reply-to: Your message of Thu, 24 Sep 1998 10:14:07 PDT. + Date: Thu, 24 Sep 1998 23:28:43 PDT + From: Vern Paxson <vern> + + > I am using flex-2.5.2 and bison 1.25 for Solaris and I am desperately + > trying to make my scanner restart with a new file after my parser stops + > with a parse error. When my compiler restarts, the parser always + > receives the token after the token (in the old file!) that caused the + > parser error. + + I suspect the problem is that your parser has read ahead in order + to attempt to resolve an ambiguity, and when it's restarted it picks + up with that token rather than reading a fresh one. If you're using + yacc, then the special "error" production can sometimes be used to + consume tokens in an attempt to get the parser into a consistent state. + + Vern + + +File: flex.info, Node: unnamed-faq-85, Next: unnamed-faq-86, Prev: unnamed-faq-84, Up: FAQ + +unnamed-faq-85 +============== + + To: Henric Jungheim <junghelh@pe-nelson.com> + Subject: Re: flex 2.5.4a + In-reply-to: Your message of Tue, 27 Oct 1998 16:41:42 PST. + Date: Tue, 27 Oct 1998 16:50:14 PST + From: Vern Paxson <vern> + + > This brings up a feature request: How about a command line + > option to specify the filename when reading from stdin? That way one + > doesn't need to create a temporary file in order to get the "#line" + > directives to make sense. + + Use -o combined with -t (per the man page description of -o). + + > P.S., Is there any simple way to use non-blocking IO to parse multiple + > streams? + + Simple, no. + + One approach might be to return a magic character on EWOULDBLOCK and + have a rule + + .*<magic-character> // put back .*, eat magic character + + This is off the top of my head, not sure it'll work. + + Vern + + +File: flex.info, Node: unnamed-faq-86, Next: unnamed-faq-87, Prev: unnamed-faq-85, Up: FAQ + +unnamed-faq-86 +============== + + To: "Repko, Billy D" <billy.d.repko@intel.com> + Subject: Re: Compiling scanners + In-reply-to: Your message of Wed, 13 Jan 1999 10:52:47 PST. + Date: Thu, 14 Jan 1999 00:25:30 PST + From: Vern Paxson <vern> + + > It appears that maybe it cannot find the lfl library. + + The Makefile in the distribution builds it, so you should have it. + It's exceedingly trivial, just a main() that calls yylex() and + a yyrap() that always returns 1. + + > %% + > \n ++num_lines; ++num_chars; + > . ++num_chars; + + You can't indent your rules like this - that's where the errors are coming + from. Flex copies indented text to the output file, it's how you do things + like + + int num_lines_seen = 0; + + to declare local variables. + + Vern + + +File: flex.info, Node: unnamed-faq-87, Next: unnamed-faq-88, Prev: unnamed-faq-86, Up: FAQ + +unnamed-faq-87 +============== + + To: Erick Branderhorst <Erick.Branderhorst@asml.nl> + Subject: Re: flex input buffer + In-reply-to: Your message of Tue, 09 Feb 1999 13:53:46 PST. + Date: Tue, 09 Feb 1999 21:03:37 PST + From: Vern Paxson <vern> + + > In the flex.skl file the size of the default input buffers is set. Can you + > explain why this size is set and why it is such a high number. + + It's large to optimize performance when scanning large files. You can + safely make it a lot lower if needed. + + Vern + + +File: flex.info, Node: unnamed-faq-88, Next: unnamed-faq-90, Prev: unnamed-faq-87, Up: FAQ + +unnamed-faq-88 +============== + + To: "Guido Minnen" <guidomi@cogs.susx.ac.uk> + Subject: Re: Flex error message + In-reply-to: Your message of Wed, 24 Feb 1999 15:31:46 PST. + Date: Thu, 25 Feb 1999 00:11:31 PST + From: Vern Paxson <vern> + + > I'm extending a larger scanner written in Flex and I keep running into + > problems. More specifically, I get the error message: + > "flex: input rules are too complicated (>= 32000 NFA states)" + + Increase the definitions in flexdef.h for: + + #define JAMSTATE -32766 /* marks a reference to the state that always j + ams */ + #define MAXIMUM_MNS 31999 + #define BAD_SUBSCRIPT -32767 + + recompile everything, and it should all work. + + Vern + + +File: flex.info, Node: unnamed-faq-90, Next: unnamed-faq-91, Prev: unnamed-faq-88, Up: FAQ + +unnamed-faq-90 +============== + + To: "Dmitriy Goldobin" <gold@ems.chel.su> + Subject: Re: FLEX trouble + In-reply-to: Your message of Mon, 31 May 1999 18:44:49 PDT. + Date: Tue, 01 Jun 1999 00:15:07 PDT + From: Vern Paxson <vern> + + > I have a trouble with FLEX. Why rule "/*".*"*/" work properly,=20 + > but rule "/*"(.|\n)*"*/" don't work ? + + The second of these will have to scan the entire input stream (because + "(.|\n)*" matches an arbitrary amount of any text) in order to see if + it ends with "*/", terminating the comment. That potentially will overflow + the input buffer. + + > More complex rule "/*"([^*]|(\*/[^/]))*"*/ give an error + > 'unrecognized rule'. + + You can't use the '/' operator inside parentheses. It's not clear + what "(a/b)*" actually means. + + > I now use workaround with state <comment>, but single-rule is + > better, i think. + + Single-rule is nice but will always have the problem of either setting + restrictions on comments (like not allowing multi-line comments) and/or + running the risk of consuming the entire input stream, as noted above. + + Vern + + +File: flex.info, Node: unnamed-faq-91, Next: unnamed-faq-92, Prev: unnamed-faq-90, Up: FAQ + +unnamed-faq-91 +============== + + Received: from mc-qout4.whowhere.com (mc-qout4.whowhere.com [209.185.123.18]) + by ee.lbl.gov (8.9.3/8.9.3) with SMTP id IAA05100 + for <vern@ee.lbl.gov>; Tue, 15 Jun 1999 08:56:06 -0700 (PDT) + Received: from Unknown/Local ([?.?.?.?]) by my-deja.com; Tue Jun 15 08:55:43 1999 + To: vern@ee.lbl.gov + Date: Tue, 15 Jun 1999 08:55:43 -0700 + From: "Aki Niimura" <neko@my-deja.com> + Message-ID: <KNONDOHDOBGAEAAA@my-deja.com> + Mime-Version: 1.0 + Cc: + X-Sent-Mail: on + Reply-To: + X-Mailer: MailCity Service + Subject: A question on flex C++ scanner + X-Sender-Ip: 12.72.207.61 + Organization: My Deja Email (http://www.my-deja.com:80) + Content-Type: text/plain; charset=us-ascii + Content-Transfer-Encoding: 7bit + + Dear Dr. Paxon, + + I have been using flex for years. + It works very well on many projects. + Most case, I used it to generate a scanner on C language. + However, one project I needed to generate a scanner + on C++ lanuage. Thanks to your enhancement, flex did + the job. + + Currently, I'm working on enhancing my previous project. + I need to deal with multiple input streams (recursive + inclusion) in this scanner (C++). + I did similar thing for another scanner (C) as you + explained in your documentation. + + The generated scanner (C++) has necessary methods: + - switch_to_buffer(struct yy_buffer_state *b) + - yy_create_buffer(istream *is, int sz) + - yy_delete_buffer(struct yy_buffer_state *b) + + However, I couldn't figure out how to access current + buffer (yy_current_buffer). + + yy_current_buffer is a protected member of yyFlexLexer. + I can't access it directly. + Then, I thought yy_create_buffer() with is = 0 might + return current stream buffer. But it seems not as far + as I checked the source. (flex 2.5.4) + + I went through the Web in addition to Flex documentation. + However, it hasn't been successful, so far. + + It is not my intention to bother you, but, can you + comment about how to obtain the current stream buffer? + + Your response would be highly appreciated. + + Best regards, + Aki Niimura + + --== Sent via Deja.com http://www.deja.com/ ==-- + Share what you know. Learn what you don't. + + +File: flex.info, Node: unnamed-faq-92, Next: unnamed-faq-93, Prev: unnamed-faq-91, Up: FAQ + +unnamed-faq-92 +============== + + To: neko@my-deja.com + Subject: Re: A question on flex C++ scanner + In-reply-to: Your message of Tue, 15 Jun 1999 08:55:43 PDT. + Date: Tue, 15 Jun 1999 09:04:24 PDT + From: Vern Paxson <vern> + + > However, I couldn't figure out how to access current + > buffer (yy_current_buffer). + + Derive your own subclass from yyFlexLexer. + + Vern + + +File: flex.info, Node: unnamed-faq-93, Next: unnamed-faq-94, Prev: unnamed-faq-92, Up: FAQ + +unnamed-faq-93 +============== + + To: "Stones, Darren" <Darren.Stones@nectech.co.uk> + Subject: Re: You're the man to see? + In-reply-to: Your message of Wed, 23 Jun 1999 11:10:29 PDT. + Date: Wed, 23 Jun 1999 09:01:40 PDT + From: Vern Paxson <vern> + + > I hope you can help me. I am using Flex and Bison to produce an interpreted + > language. However all goes well until I try to implement an IF statement or + > a WHILE. I cannot get this to work as the parser parses all the conditions + > eg. the TRUE and FALSE conditons to check for a rule match. So I cannot + > make a decision!! + + You need to use the parser to build a parse tree (= abstract syntax trwee), + and when that's all done you recursively evaluate the tree, binding variables + to values at that time. + + Vern + + +File: flex.info, Node: unnamed-faq-94, Next: unnamed-faq-95, Prev: unnamed-faq-93, Up: FAQ + +unnamed-faq-94 +============== + + To: Petr Danecek <petr@ics.cas.cz> + Subject: Re: flex - question + In-reply-to: Your message of Mon, 28 Jun 1999 19:21:41 PDT. + Date: Fri, 02 Jul 1999 16:52:13 PDT + From: Vern Paxson <vern> + + > file, it takes an enormous amount of time. It is funny, because the + > source code has only 12 rules!!! I think it looks like an exponencial + > growth. + + Right, that's the problem - some patterns (those with a lot of + ambiguity, where yours has because at any given time the scanner can + be in the middle of all sorts of combinations of the different + rules) blow up exponentially. + + For your rules, there is an easy fix. Change the ".*" that comes fater + the directory name to "[^ ]*". With that in place, the rules are no + longer nearly so ambiguous, because then once one of the directories + has been matched, no other can be matched (since they all require a + leading blank). + + If that's not an acceptable solution, then you can enter a start state + to pick up the .*\n after each directory is matched. + + Also note that for speed, you'll want to add a ".*" rule at the end, + otherwise rules that don't match any of the patterns will be matched + very slowly, a character at a time. + + Vern + + +File: flex.info, Node: unnamed-faq-95, Next: unnamed-faq-96, Prev: unnamed-faq-94, Up: FAQ + +unnamed-faq-95 +============== + + To: Tielman Koekemoer <tielman@spi.co.za> + Subject: Re: Please help. + In-reply-to: Your message of Thu, 08 Jul 1999 13:20:37 PDT. + Date: Thu, 08 Jul 1999 08:20:39 PDT + From: Vern Paxson <vern> + + > I was hoping you could help me with my problem. + > + > I tried compiling (gnu)flex on a Solaris 2.4 machine + > but when I ran make (after configure) I got an error. + > + > -------------------------------------------------------------- + > gcc -c -I. -I. -g -O parse.c + > ./flex -t -p ./scan.l >scan.c + > sh: ./flex: not found + > *** Error code 1 + > make: Fatal error: Command failed for target `scan.c' + > ------------------------------------------------------------- + > + > What's strange to me is that I'm only + > trying to install flex now. I then edited the Makefile to + > and changed where it says "FLEX = flex" to "FLEX = lex" + > ( lex: the native Solaris one ) but then it complains about + > the "-p" option. Is there any way I can compile flex without + > using flex or lex? + > + > Thanks so much for your time. + + You managed to step on the bootstrap sequence, which first copies + initscan.c to scan.c in order to build flex. Try fetching a fresh + distribution from ftp.ee.lbl.gov. (Or you can first try removing + ".bootstrap" and doing a make again.) + + Vern + + +File: flex.info, Node: unnamed-faq-96, Next: unnamed-faq-97, Prev: unnamed-faq-95, Up: FAQ + +unnamed-faq-96 +============== + + To: Tielman Koekemoer <tielman@spi.co.za> + Subject: Re: Please help. + In-reply-to: Your message of Fri, 09 Jul 1999 09:16:14 PDT. + Date: Fri, 09 Jul 1999 00:27:20 PDT + From: Vern Paxson <vern> + + > First I removed .bootstrap (and ran make) - no luck. I downloaded the + > software but I still have the same problem. Is there anything else I + > could try. + + Try: + + cp initscan.c scan.c + touch scan.c + make scan.o + + If this last tries to first build scan.c from scan.l using ./flex, then + your "make" is broken, in which case compile scan.c to scan.o by hand. + + Vern + + +File: flex.info, Node: unnamed-faq-97, Next: unnamed-faq-98, Prev: unnamed-faq-96, Up: FAQ + +unnamed-faq-97 +============== + + To: Sumanth Kamenani <skamenan@crl.nmsu.edu> + Subject: Re: Error + In-reply-to: Your message of Mon, 19 Jul 1999 23:08:41 PDT. + Date: Tue, 20 Jul 1999 00:18:26 PDT + From: Vern Paxson <vern> + + > I am getting a compilation error. The error is given as "unknown symbol- yylex". + + The parser relies on calling yylex(), but you're instead using the C++ scanning + class, so you need to supply a yylex() "glue" function that calls an instance + scanner of the scanner (e.g., "scanner->yylex()"). + + Vern + + +File: flex.info, Node: unnamed-faq-98, Next: unnamed-faq-99, Prev: unnamed-faq-97, Up: FAQ + +unnamed-faq-98 +============== + + To: daniel@synchrods.synchrods.COM (Daniel Senderowicz) + Subject: Re: lex + In-reply-to: Your message of Mon, 22 Nov 1999 11:19:04 PST. + Date: Tue, 23 Nov 1999 15:54:30 PST + From: Vern Paxson <vern> + + Well, your problem is the + + switch (yybgin-yysvec-1) { /* witchcraft */ + + at the beginning of lex rules. "witchcraft" == "non-portable". It's + assuming knowledge of the AT&T lex's internal variables. + + For flex, you can probably do the equivalent using a switch on YYSTATE. + + Vern + + +File: flex.info, Node: unnamed-faq-99, Next: unnamed-faq-100, Prev: unnamed-faq-98, Up: FAQ + +unnamed-faq-99 +============== + + To: archow@hss.hns.com + Subject: Re: Regarding distribution of flex and yacc based grammars + In-reply-to: Your message of Sun, 19 Dec 1999 17:50:24 +0530. + Date: Wed, 22 Dec 1999 01:56:24 PST + From: Vern Paxson <vern> + + > When we provide the customer with an object code distribution, is it + > necessary for us to provide source + > for the generated C files from flex and bison since they are generated by + > flex and bison ? + + For flex, no. I don't know what the current state of this is for bison. + + > Also, is there any requrirement for us to neccessarily provide source for + > the grammar files which are fed into flex and bison ? + + Again, for flex, no. + + See the file "COPYING" in the flex distribution for the legalese. + + Vern + + +File: flex.info, Node: unnamed-faq-100, Next: unnamed-faq-101, Prev: unnamed-faq-99, Up: FAQ + +unnamed-faq-100 +=============== + + To: Martin Gallwey <gallweym@hyperion.moe.ul.ie> + Subject: Re: Flex, and self referencing rules + In-reply-to: Your message of Sun, 20 Feb 2000 01:01:21 PST. + Date: Sat, 19 Feb 2000 18:33:16 PST + From: Vern Paxson <vern> + + > However, I do not use unput anywhere. I do use self-referencing + > rules like this: + > + > UnaryExpr ({UnionExpr})|("-"{UnaryExpr}) + + You can't do this - flex is *not* a parser like yacc (which does indeed + allow recursion), it is a scanner that's confined to regular expressions. + + Vern + + +File: flex.info, Node: unnamed-faq-101, Next: What is the difference between YYLEX_PARAM and YY_DECL?, Prev: unnamed-faq-100, Up: FAQ + +unnamed-faq-101 +=============== + + To: slg3@lehigh.edu (SAMUEL L. GULDEN) + Subject: Re: Flex problem + In-reply-to: Your message of Thu, 02 Mar 2000 12:29:04 PST. + Date: Thu, 02 Mar 2000 23:00:46 PST + From: Vern Paxson <vern> + + If this is exactly your program: + + > digit [0-9] + > digits {digit}+ + > whitespace [ \t\n]+ + > + > %% + > "[" { printf("open_brac\n");} + > "]" { printf("close_brac\n");} + > "+" { printf("addop\n");} + > "*" { printf("multop\n");} + > {digits} { printf("NUMBER = %s\n", yytext);} + > whitespace ; + + then the problem is that the last rule needs to be "{whitespace}" ! + + Vern + + +File: flex.info, Node: What is the difference between YYLEX_PARAM and YY_DECL?, Next: Why do I get "conflicting types for yylex" error?, Prev: unnamed-faq-101, Up: FAQ + +What is the difference between YYLEX_PARAM and YY_DECL? +======================================================= + +YYLEX_PARAM is not a flex symbol. It is for Bison. It tells Bison to +pass extra params when it calls yylex() from the parser. + + YY_DECL is the Flex declaration of yylex. The default is similar to +this: + + #define int yy_lex () + + +File: flex.info, Node: Why do I get "conflicting types for yylex" error?, Next: How do I access the values set in a Flex action from within a Bison action?, Prev: What is the difference between YYLEX_PARAM and YY_DECL?, Up: FAQ + +Why do I get "conflicting types for yylex" error? +================================================= + +This is a compiler error regarding a generated Bison parser, not a Flex +scanner. It means you need a prototype of yylex() in the top of the +Bison file. Be sure the prototype matches YY_DECL. + + +File: flex.info, Node: How do I access the values set in a Flex action from within a Bison action?, Prev: Why do I get "conflicting types for yylex" error?, Up: FAQ + +How do I access the values set in a Flex action from within a Bison action? +=========================================================================== + +With $1, $2, $3, etc. These are called "Semantic Values" in the Bison +manual. See *note (bison)Top::. + + +File: flex.info, Node: Appendices, Next: Indices, Prev: FAQ, Up: Top + +Appendix A Appendices +********************* + +* Menu: + +* Makefiles and Flex:: +* Bison Bridge:: +* M4 Dependency:: +* Common Patterns:: + + +File: flex.info, Node: Makefiles and Flex, Next: Bison Bridge, Prev: Appendices, Up: Appendices + +A.1 Makefiles and Flex +====================== + +In this appendix, we provide tips for writing Makefiles to build your +scanners. + + In a traditional build environment, we say that the '.c' files are +the sources, and the '.o' files are the intermediate files. When using +'flex', however, the '.l' files are the sources, and the generated '.c' +files (along with the '.o' files) are the intermediate files. This +requires you to carefully plan your Makefile. + + Modern 'make' programs understand that 'foo.l' is intended to +generate 'lex.yy.c' or 'foo.c', and will behave accordingly(1)(2). The +following Makefile does not explicitly instruct 'make' how to build +'foo.c' from 'foo.l'. Instead, it relies on the implicit rules of the +'make' program to build the intermediate file, 'scan.c': + + # Basic Makefile -- relies on implicit rules + # Creates "myprogram" from "scan.l" and "myprogram.c" + # + LEX=flex + myprogram: scan.o myprogram.o + scan.o: scan.l + + + For simple cases, the above may be sufficient. For other cases, you +may have to explicitly instruct 'make' how to build your scanner. The +following is an example of a Makefile containing explicit rules: + + # Basic Makefile -- provides explicit rules + # Creates "myprogram" from "scan.l" and "myprogram.c" + # + LEX=flex + myprogram: scan.o myprogram.o + $(CC) -o $@ $(LDFLAGS) $^ + + myprogram.o: myprogram.c + $(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $^ + + scan.o: scan.c + $(CC) $(CPPFLAGS) $(CFLAGS) -o $@ -c $^ + + scan.c: scan.l + $(LEX) $(LFLAGS) -o $@ $^ + + clean: + $(RM) *.o scan.c + + + Notice in the above example that 'scan.c' is in the 'clean' target. +This is because we consider the file 'scan.c' to be an intermediate +file. + + Finally, we provide a realistic example of a 'flex' scanner used with +a 'bison' parser(3). There is a tricky problem we have to deal with. +Since a 'flex' scanner will typically include a header file (e.g., +'y.tab.h') generated by the parser, we need to be sure that the header +file is generated BEFORE the scanner is compiled. We handle this case +in the following example: + + # Makefile example -- scanner and parser. + # Creates "myprogram" from "scan.l", "parse.y", and "myprogram.c" + # + LEX = flex + YACC = bison -y + YFLAGS = -d + objects = scan.o parse.o myprogram.o + + myprogram: $(objects) + scan.o: scan.l parse.c + parse.o: parse.y + myprogram.o: myprogram.c + + + In the above example, notice the line, + + scan.o: scan.l parse.c + + , which lists the file 'parse.c' (the generated parser) as a +dependency of 'scan.o'. We want to ensure that the parser is created +before the scanner is compiled, and the above line seems to do the +trick. Feel free to experiment with your specific implementation of +'make'. + + For more details on writing Makefiles, see *note (make)Top::. + + ---------- Footnotes ---------- + + (1) GNU 'make' and GNU 'automake' are two such programs that provide +implicit rules for flex-generated scanners. + + (2) GNU 'automake' may generate code to execute flex in +lex-compatible mode, or to stdout. If this is not what you want, then +you should provide an explicit rule in your Makefile.am + + (3) This example also applies to yacc parsers. + + +File: flex.info, Node: Bison Bridge, Next: M4 Dependency, Prev: Makefiles and Flex, Up: Appendices + +A.2 C Scanners with Bison Parsers +================================= + +This section describes the 'flex' features useful when integrating +'flex' with 'GNU bison'(1). Skip this section if you are not using +'bison' with your scanner. Here we discuss only the 'flex' half of the +'flex' and 'bison' pair. We do not discuss 'bison' in any detail. For +more information about generating 'bison' parsers, see *note +(bison)Top::. + + A compatible 'bison' scanner is generated by declaring '%option +bison-bridge' or by supplying '--bison-bridge' when invoking 'flex' from +the command line. This instructs 'flex' that the macro 'yylval' may be +used. The data type for 'yylval', 'YYSTYPE', is typically defined in a +header file, included in section 1 of the 'flex' input file. For a list +of functions and macros available, *Note bison-functions::. + + The declaration of yylex becomes, + + int yylex ( YYSTYPE * lvalp, yyscan_t scanner ); + + If '%option bison-locations' is specified, then the declaration +becomes, + + int yylex ( YYSTYPE * lvalp, YYLTYPE * llocp, yyscan_t scanner ); + + Note that the macros 'yylval' and 'yylloc' evaluate to pointers. +Support for 'yylloc' is optional in 'bison', so it is optional in 'flex' +as well. The following is an example of a 'flex' scanner that is +compatible with 'bison'. + + /* Scanner for "C" assignment statements... sort of. */ + %{ + #include "y.tab.h" /* Generated by bison. */ + %} + + %option bison-bridge bison-locations + % + + [[:digit:]]+ { yylval->num = atoi(yytext); return NUMBER;} + [[:alnum:]]+ { yylval->str = strdup(yytext); return STRING;} + "="|";" { return yytext[0];} + . {} + % + + As you can see, there really is no magic here. We just use 'yylval' +as we would any other variable. The data type of 'yylval' is generated +by 'bison', and included in the file 'y.tab.h'. Here is the +corresponding 'bison' parser: + + /* Parser to convert "C" assignments to lisp. */ + %{ + /* Pass the argument to yyparse through to yylex. */ + #define YYPARSE_PARAM scanner + #define YYLEX_PARAM scanner + %} + %locations + %pure_parser + %union { + int num; + char* str; + } + %token <str> STRING + %token <num> NUMBER + %% + assignment: + STRING '=' NUMBER ';' { + printf( "(setf %s %d)", $1, $3 ); + } + ; + + ---------- Footnotes ---------- + + (1) The features described here are purely optional, and are by no +means the only way to use flex with bison. We merely provide some glue +to ease development of your parser-scanner pair. + + +File: flex.info, Node: M4 Dependency, Next: Common Patterns, Prev: Bison Bridge, Up: Appendices + +A.3 M4 Dependency +================= + +The macro processor 'm4'(1) must be installed wherever flex is +installed. 'flex' invokes 'm4', found by searching the directories in +the 'PATH' environment variable. Any code you place in section 1 or in +the actions will be sent through m4. Please follow these rules to +protect your code from unwanted 'm4' processing. + + * Do not use symbols that begin with, 'm4_', such as, 'm4_define', or + 'm4_include', since those are reserved for 'm4' macro names. If + for some reason you need m4_ as a prefix, use a preprocessor + #define to get your symbol past m4 unmangled. + + * Do not use the strings '[[' or ']]' anywhere in your code. The + former is not valid in C, except within comments and strings, but + the latter is valid in code such as 'x[y[z]]'. The solution is + simple. To get the literal string '"]]"', use '"]""]"'. To get + the array notation 'x[y[z]]', use 'x[y[z] ]'. Flex will attempt to + detect these sequences in user code, and escape them. However, + it's best to avoid this complexity where possible, by removing such + sequences from your code. + + 'm4' is only required at the time you run 'flex'. The generated +scanner is ordinary C or C++, and does _not_ require 'm4'. + + ---------- Footnotes ---------- + + (1) The use of m4 is subject to change in future revisions of flex. +It is not part of the public API of flex. Do not depend on it. + + +File: flex.info, Node: Common Patterns, Prev: M4 Dependency, Up: Appendices + +A.4 Common Patterns +=================== + +This appendix provides examples of common regular expressions you might +use in your scanner. + +* Menu: + +* Numbers:: +* Identifiers:: +* Quoted Constructs:: +* Addresses:: + + +File: flex.info, Node: Numbers, Next: Identifiers, Up: Common Patterns + +A.4.1 Numbers +------------- + +C99 decimal constant + '([[:digit:]]{-}[0])[[:digit:]]*' + +C99 hexadecimal constant + '0[xX][[:xdigit:]]+' + +C99 octal constant + '0[01234567]*' + +C99 floating point constant + {dseq} ([[:digit:]]+) + {dseq_opt} ([[:digit:]]*) + {frac} (({dseq_opt}"."{dseq})|{dseq}".") + {exp} ([eE][+-]?{dseq}) + {exp_opt} ({exp}?) + {fsuff} [flFL] + {fsuff_opt} ({fsuff}?) + {hpref} (0[xX]) + {hdseq} ([[:xdigit:]]+) + {hdseq_opt} ([[:xdigit:]]*) + {hfrac} (({hdseq_opt}"."{hdseq})|({hdseq}".")) + {bexp} ([pP][+-]?{dseq}) + {dfc} (({frac}{exp_opt}{fsuff_opt})|({dseq}{exp}{fsuff_opt})) + {hfc} (({hpref}{hfrac}{bexp}{fsuff_opt})|({hpref}{hdseq}{bexp}{fsuff_opt})) + + {c99_floating_point_constant} ({dfc}|{hfc}) + + See C99 section 6.4.4.2 for the gory details. + + +File: flex.info, Node: Identifiers, Next: Quoted Constructs, Prev: Numbers, Up: Common Patterns + +A.4.2 Identifiers +----------------- + +C99 Identifier + ucn ((\\u([[:xdigit:]]{4}))|(\\U([[:xdigit:]]{8}))) + nondigit [_[:alpha:]] + c99_id ([_[:alpha:]]|{ucn})([_[:alnum:]]|{ucn})* + + Technically, the above pattern does not encompass all possible C99 + identifiers, since C99 allows for "implementation-defined" + characters. In practice, C compilers follow the above pattern, + with the addition of the '$' character. + +UTF-8 Encoded Unicode Code Point + [\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF]([\x80-\xBF]{2})|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF]([\x80-\xBF]{2})|[\xF1-\xF3]([\x80-\xBF]{3})|\xF4[\x80-\x8F]([\x80-\xBF]{2}) + + +File: flex.info, Node: Quoted Constructs, Next: Addresses, Prev: Identifiers, Up: Common Patterns + +A.4.3 Quoted Constructs +----------------------- + +C99 String Literal + 'L?\"([^\"\\\n]|(\\['\"?\\abfnrtv])|(\\([0123456]{1,3}))|(\\x[[:xdigit:]]+)|(\\u([[:xdigit:]]{4}))|(\\U([[:xdigit:]]{8})))*\"' + +C99 Comment + '("/*"([^*]|"*"[^/])*"*/")|("/"(\\\n)*"/"[^\n]*)' + + Note that in C99, a '//'-style comment may be split across lines, + and, contrary to popular belief, does not include the trailing '\n' + character. + + A better way to scan '/* */' comments is by line, rather than + matching possibly huge comments all at once. This will allow you + to scan comments of unlimited length, as long as line breaks appear + at sane intervals. This is also more efficient when used with + automatic line number processing. *Note option-yylineno::. + + <INITIAL>{ + "/*" BEGIN(COMMENT); + } + <COMMENT>{ + "*/" BEGIN(0); + [^*\n]+ ; + "*"[^/] ; + \n ; + } + + +File: flex.info, Node: Addresses, Prev: Quoted Constructs, Up: Common Patterns + +A.4.4 Addresses +--------------- + +IPv4 Address + dec-octet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5] + IPv4address {dec-octet}\.{dec-octet}\.{dec-octet}\.{dec-octet} + +IPv6 Address + h16 [0-9A-Fa-f]{1,4} + ls32 {h16}:{h16}|{IPv4address} + IPv6address ({h16}:){6}{ls32}| + ::({h16}:){5}{ls32}| + ({h16})?::({h16}:){4}{ls32}| + (({h16}:){0,1}{h16})?::({h16}:){3}{ls32}| + (({h16}:){0,2}{h16})?::({h16}:){2}{ls32}| + (({h16}:){0,3}{h16})?::{h16}:{ls32}| + (({h16}:){0,4}{h16})?::{ls32}| + (({h16}:){0,5}{h16})?::{h16}| + (({h16}:){0,6}{h16})?:: + + See RFC 2373 (http://www.ietf.org/rfc/rfc2373.txt) for details. + Note that you have to fold the definition of 'IPv6address' into one + line and that it also matches the "unspecified address" "::". + +URI + '(([^:/?#]+):)?("//"([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' + + This pattern is nearly useless, since it allows just about any + character to appear in a URI, including spaces and control + characters. See RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt) for + details. + + +File: flex.info, Node: Indices, Prev: Appendices, Up: Top + +Indices +******* + +* Menu: + +* Concept Index:: +* Index of Functions and Macros:: +* Index of Variables:: +* Index of Data Types:: +* Index of Hooks:: +* Index of Scanner Options:: + + +File: flex.info, Node: Concept Index, Next: Index of Functions and Macros, Prev: Indices, Up: Indices + +Concept Index +============= + + +* Menu: + +* $ as normal character in patterns: Patterns. (line 275) +* %array, advantages of: Matching. (line 43) +* %array, use of: Matching. (line 29) +* %array, with C++: Matching. (line 65) +* %option noyywrapp: Generated Scanner. (line 93) +* %pointer, and unput(): Actions. (line 162) +* %pointer, use of: Matching. (line 29) +* %top: Definitions Section. (line 44) +* %{ and %}, in Definitions Section: Definitions Section. (line 40) +* %{ and %}, in Rules Section: Actions. (line 26) +* <<EOF>>, use of: EOF. (line 33) +* [] in patterns: Patterns. (line 15) +* ^ as non-special character in patterns: Patterns. (line 275) +* |, in actions: Actions. (line 33) +* |, use of: Actions. (line 83) +* accessor functions, use of: Accessor Methods. (line 18) +* actions: Actions. (line 6) +* actions, embedded C strings: Actions. (line 26) +* actions, redefining YY_BREAK: Misc Macros. (line 49) +* actions, use of { and }: Actions. (line 26) +* aliases, how to define: Definitions Section. (line 10) +* arguments, command-line: Scanner Options. (line 6) +* array, default size for yytext: User Values. (line 13) +* backing up, eliminating: Performance. (line 54) +* backing up, eliminating by adding error rules: Performance. (line 104) +* backing up, eliminating with catch-all rule: Performance. (line 118) +* backing up, example of eliminating: Performance. (line 49) +* BEGIN: Actions. (line 57) +* BEGIN, explanation: Start Conditions. (line 84) +* beginning of line, in patterns: Patterns. (line 127) +* bison, bridging with flex: Bison Bridge. (line 6) +* bison, parser: Bison Bridge. (line 53) +* bison, scanner to be called from bison: Bison Bridge. (line 34) +* BOL, checking the BOL flag: Misc Macros. (line 46) +* BOL, in patterns: Patterns. (line 127) +* BOL, setting it: Misc Macros. (line 40) +* braces in patterns: Patterns. (line 42) +* bugs, reporting: Reporting Bugs. (line 6) +* C code in flex input: Definitions Section. (line 40) +* C++: Cxx. (line 9) +* C++ and %array: User Values. (line 23) +* C++ I/O, customizing: How do I use my own I/O classes in a C++ scanner?. + (line 9) +* C++ scanners, including multiple scanners: Cxx. (line 197) +* C++ scanners, use of: Cxx. (line 128) +* c++, experimental form of scanner class: Cxx. (line 6) +* C++, multiple different scanners: Cxx. (line 192) +* C-strings, in actions: Actions. (line 26) +* case-insensitive, effect on character classes: Patterns. (line 216) +* character classes in patterns: Patterns. (line 186) +* character classes in patterns, syntax of: Patterns. (line 15) +* character classes, equivalence of: Patterns. (line 205) +* clearing an input buffer: Multiple Input Buffers. + (line 66) +* command-line options: Scanner Options. (line 6) +* comments in flex input: Definitions Section. (line 37) +* comments in the input: Comments in the Input. + (line 24) +* comments, discarding: Actions. (line 176) +* comments, example of scanning C comments: Start Conditions. (line 140) +* comments, in actions: Actions. (line 26) +* comments, in rules section: Comments in the Input. + (line 11) +* comments, syntax of: Comments in the Input. + (line 6) +* comments, valid uses of: Comments in the Input. + (line 24) +* compressing whitespace: Actions. (line 22) +* concatenation, in patterns: Patterns. (line 111) +* copyright of flex: Copyright. (line 6) +* counting characters and lines: Simple Examples. (line 23) +* customizing I/O in C++ scanners: How do I use my own I/O classes in a C++ scanner?. + (line 9) +* default rule: Simple Examples. (line 15) +* default rule <1>: Matching. (line 20) +* defining pattern aliases: Definitions Section. (line 21) +* Definitions, in flex input: Definitions Section. (line 6) +* deleting lines from input: Actions. (line 13) +* discarding C comments: Actions. (line 176) +* distributing flex: Copyright. (line 6) +* ECHO: Actions. (line 54) +* ECHO, and yyout: Generated Scanner. (line 101) +* embedding C code in flex input: Definitions Section. (line 40) +* end of file, in patterns: Patterns. (line 150) +* end of line, in negated character classes: Patterns. (line 237) +* end of line, in patterns: Patterns. (line 131) +* end-of-file, and yyrestart(): Generated Scanner. (line 42) +* EOF and yyrestart(): Generated Scanner. (line 42) +* EOF in patterns, syntax of: Patterns. (line 150) +* EOF, example using multiple input buffers: Multiple Input Buffers. + (line 81) +* EOF, explanation: EOF. (line 6) +* EOF, pushing back: Actions. (line 170) +* EOL, in negated character classes: Patterns. (line 237) +* EOL, in patterns: Patterns. (line 131) +* error messages, end of buffer missed: Lex and Posix. (line 50) +* error reporting, diagnostic messages: Diagnostics. (line 6) +* error reporting, in C++: Cxx. (line 112) +* error rules, to eliminate backing up: Performance. (line 102) +* escape sequences in patterns, syntax of: Patterns. (line 57) +* exiting with yyterminate(): Actions. (line 212) +* experimental form of c++ scanner class: Cxx. (line 6) +* extended scope of start conditions: Start Conditions. (line 270) +* file format: Format. (line 6) +* file format, serialized tables: Tables File Format. (line 6) +* flushing an input buffer: Multiple Input Buffers. + (line 66) +* flushing the internal buffer: Actions. (line 206) +* format of flex input: Format. (line 6) +* format of input file: Format. (line 9) +* freeing tables: Loading and Unloading Serialized Tables. + (line 6) +* getting current start state with YY_START: Start Conditions. + (line 189) +* halting with yyterminate(): Actions. (line 212) +* handling include files with multiple input buffers: Multiple Input Buffers. + (line 87) +* handling include files with multiple input buffers <1>: Multiple Input Buffers. + (line 122) +* header files, with C++: Cxx. (line 197) +* include files, with C++: Cxx. (line 197) +* input file, Definitions section: Definitions Section. (line 6) +* input file, Rules Section: Rules Section. (line 6) +* input file, user code Section: User Code Section. (line 6) +* input(): Actions. (line 173) +* input(), and C++: Actions. (line 202) +* input, format of: Format. (line 6) +* input, matching: Matching. (line 6) +* keywords, for performance: Performance. (line 200) +* lex (traditional) and POSIX: Lex and Posix. (line 6) +* LexerInput, overriding: How do I use my own I/O classes in a C++ scanner?. + (line 9) +* LexerOutput, overriding: How do I use my own I/O classes in a C++ scanner?. + (line 9) +* limitations of flex: Limitations. (line 6) +* literal text in patterns, syntax of: Patterns. (line 54) +* loading tables at runtime: Loading and Unloading Serialized Tables. + (line 6) +* m4: M4 Dependency. (line 6) +* Makefile, example of implicit rules: Makefiles and Flex. (line 21) +* Makefile, explicit example: Makefiles and Flex. (line 33) +* Makefile, syntax: Makefiles and Flex. (line 6) +* matching C-style double-quoted strings: Start Conditions. (line 203) +* matching, and trailing context: Matching. (line 6) +* matching, length of: Matching. (line 6) +* matching, multiple matches: Matching. (line 6) +* member functions, C++: Cxx. (line 9) +* memory management: Memory Management. (line 6) +* memory, allocating input buffers: Multiple Input Buffers. + (line 19) +* memory, considerations for reentrant scanners: Init and Destroy Functions. + (line 6) +* memory, deleting input buffers: Multiple Input Buffers. + (line 46) +* memory, for start condition stacks: Start Conditions. (line 301) +* memory, serialized tables: Serialized Tables. (line 6) +* memory, serialized tables <1>: Loading and Unloading Serialized Tables. + (line 6) +* methods, c++: Cxx. (line 9) +* minimal scanner: Matching. (line 24) +* multiple input streams: Multiple Input Buffers. + (line 6) +* name definitions, not POSIX: Lex and Posix. (line 75) +* negating ranges in patterns: Patterns. (line 23) +* newline, matching in patterns: Patterns. (line 135) +* non-POSIX features of flex: Lex and Posix. (line 142) +* noyywrap, %option: Generated Scanner. (line 93) +* NULL character in patterns, syntax of: Patterns. (line 62) +* octal characters in patterns: Patterns. (line 65) +* options, command-line: Scanner Options. (line 6) +* overriding LexerInput: How do I use my own I/O classes in a C++ scanner?. + (line 9) +* overriding LexerOutput: How do I use my own I/O classes in a C++ scanner?. + (line 9) +* overriding the memory routines: Overriding The Default Memory Management. + (line 38) +* Pascal-like language: Simple Examples. (line 49) +* pattern aliases, defining: Definitions Section. (line 21) +* pattern aliases, expansion of: Patterns. (line 51) +* pattern aliases, how to define: Definitions Section. (line 10) +* pattern aliases, use of: Definitions Section. (line 28) +* patterns and actions on different lines: Lex and Posix. (line 101) +* patterns, character class equivalence: Patterns. (line 205) +* patterns, common: Common Patterns. (line 6) +* patterns, end of line: Patterns. (line 300) +* patterns, grouping and precedence: Patterns. (line 167) +* patterns, in rules section: Patterns. (line 6) +* patterns, invalid trailing context: Patterns. (line 285) +* patterns, matching: Matching. (line 6) +* patterns, precedence of operators: Patterns. (line 161) +* patterns, repetitions with grouping: Patterns. (line 184) +* patterns, special characters treated as non-special: Patterns. + (line 293) +* patterns, syntax: Patterns. (line 9) +* patterns, syntax <1>: Patterns. (line 9) +* patterns, tuning for performance: Performance. (line 49) +* patterns, valid character classes: Patterns. (line 192) +* performance optimization, matching longer tokens: Performance. + (line 167) +* performance optimization, recognizing keywords: Performance. + (line 205) +* performance, backing up: Performance. (line 49) +* performance, considerations: Performance. (line 6) +* performance, using keywords: Performance. (line 200) +* popping an input buffer: Multiple Input Buffers. + (line 60) +* POSIX and lex: Lex and Posix. (line 6) +* POSIX comp;compliance: Lex and Posix. (line 142) +* POSIX, character classes in patterns, syntax of: Patterns. (line 15) +* preprocessor macros, for use in actions: Actions. (line 50) +* pushing an input buffer: Multiple Input Buffers. + (line 52) +* pushing back characters with unput: Actions. (line 143) +* pushing back characters with unput(): Actions. (line 147) +* pushing back characters with yyless: Actions. (line 131) +* pushing back EOF: Actions. (line 170) +* ranges in patterns: Patterns. (line 19) +* ranges in patterns, negating: Patterns. (line 23) +* recognizing C comments: Start Conditions. (line 143) +* reentrant scanners, multiple interleaved scanners: Reentrant Uses. + (line 10) +* reentrant scanners, recursive invocation: Reentrant Uses. (line 30) +* reentrant, accessing flex variables: Global Replacement. (line 6) +* reentrant, accessor functions: Accessor Methods. (line 6) +* reentrant, API explanation: Reentrant Overview. (line 6) +* reentrant, calling functions: Extra Reentrant Argument. + (line 6) +* reentrant, example of: Reentrant Example. (line 6) +* reentrant, explanation: Reentrant. (line 6) +* reentrant, extra data: Extra Data. (line 6) +* reentrant, initialization: Init and Destroy Functions. + (line 6) +* regular expressions, in patterns: Patterns. (line 6) +* REJECT: Actions. (line 61) +* REJECT, calling multiple times: Actions. (line 83) +* REJECT, performance costs: Performance. (line 12) +* reporting bugs: Reporting Bugs. (line 6) +* restarting the scanner: Lex and Posix. (line 54) +* RETURN, within actions: Generated Scanner. (line 57) +* rules, default: Simple Examples. (line 15) +* rules, in flex input: Rules Section. (line 6) +* scanner, definition of: Introduction. (line 6) +* sections of flex input: Format. (line 6) +* serialization: Serialized Tables. (line 6) +* serialization of tables: Creating Serialized Tables. + (line 6) +* serialized tables, multiple scanners: Creating Serialized Tables. + (line 26) +* stack, input buffer pop: Multiple Input Buffers. + (line 60) +* stack, input buffer push: Multiple Input Buffers. + (line 52) +* stacks, routines for manipulating: Start Conditions. (line 286) +* start condition, applying to multiple patterns: Start Conditions. + (line 258) +* start conditions: Start Conditions. (line 6) +* start conditions, behavior of default rule: Start Conditions. + (line 82) +* start conditions, exclusive: Start Conditions. (line 53) +* start conditions, for different interpretations of same input: Start Conditions. + (line 112) +* start conditions, in patterns: Patterns. (line 140) +* start conditions, inclusive: Start Conditions. (line 44) +* start conditions, inclusive v.s. exclusive: Start Conditions. + (line 24) +* start conditions, integer values: Start Conditions. (line 163) +* start conditions, multiple: Start Conditions. (line 17) +* start conditions, special wildcard condition: Start Conditions. + (line 68) +* start conditions, use of a stack: Start Conditions. (line 286) +* start conditions, use of wildcard condition (<*>): Start Conditions. + (line 72) +* start conditions, using BEGIN: Start Conditions. (line 95) +* stdin, default for yyin: Generated Scanner. (line 37) +* stdout, as default for yyout: Generated Scanner. (line 101) +* strings, scanning strings instead of files: Multiple Input Buffers. + (line 175) +* tables, creating serialized: Creating Serialized Tables. + (line 6) +* tables, file format: Tables File Format. (line 6) +* tables, freeing: Loading and Unloading Serialized Tables. + (line 6) +* tables, loading and unloading: Loading and Unloading Serialized Tables. + (line 6) +* terminating with yyterminate(): Actions. (line 212) +* token: Matching. (line 14) +* trailing context, in patterns: Patterns. (line 118) +* trailing context, limits of: Patterns. (line 275) +* trailing context, matching: Matching. (line 6) +* trailing context, performance costs: Performance. (line 12) +* trailing context, variable length: Performance. (line 141) +* unput(): Actions. (line 143) +* unput(), and %pointer: Actions. (line 162) +* unput(), pushing back characters: Actions. (line 147) +* user code, in flex input: User Code Section. (line 6) +* username expansion: Simple Examples. (line 8) +* using integer values of start condition names: Start Conditions. + (line 163) +* verbatim text in patterns, syntax of: Patterns. (line 54) +* warning, dangerous trailing context: Limitations. (line 20) +* warning, rule cannot be matched: Diagnostics. (line 14) +* warnings, diagnostic messages: Diagnostics. (line 6) +* whitespace, compressing: Actions. (line 22) +* yacc interface: Yacc. (line 17) +* yacc, interface: Yacc. (line 6) +* yyalloc, overriding: Overriding The Default Memory Management. + (line 6) +* yyfree, overriding: Overriding The Default Memory Management. + (line 6) +* yyin: Generated Scanner. (line 37) +* yyinput(): Actions. (line 202) +* yyleng: Matching. (line 14) +* yyleng, modification of: Actions. (line 47) +* yyless(): Actions. (line 125) +* yyless(), pushing back characters: Actions. (line 131) +* yylex(), in generated scanner: Generated Scanner. (line 6) +* yylex(), overriding: Generated Scanner. (line 16) +* yylex, overriding the prototype of: Generated Scanner. (line 20) +* yylineno, in a reentrant scanner: Reentrant Functions. (line 36) +* yylineno, performance costs: Performance. (line 12) +* yymore(): Actions. (line 104) +* yymore() to append token to previous token: Actions. (line 110) +* yymore(), mega-kludge: Actions. (line 110) +* yymore, and yyleng: Actions. (line 47) +* yymore, performance penalty of: Actions. (line 119) +* yyout: Generated Scanner. (line 101) +* yyrealloc, overriding: Overriding The Default Memory Management. + (line 6) +* yyrestart(): Generated Scanner. (line 42) +* yyterminate(): Actions. (line 212) +* yytext: Matching. (line 14) +* yytext, default array size: User Values. (line 13) +* yytext, memory considerations: A Note About yytext And Memory. + (line 6) +* yytext, modification of: Actions. (line 42) +* yytext, two types of: Matching. (line 29) +* yywrap(): Generated Scanner. (line 85) +* yywrap, default for: Generated Scanner. (line 93) +* YY_CURRENT_BUFFER, and multiple buffers Finally, the macro: Multiple Input Buffers. + (line 78) +* YY_EXTRA_TYPE, defining your own type: Extra Data. (line 33) +* YY_FLUSH_BUFFER: Actions. (line 206) +* YY_INPUT: Generated Scanner. (line 61) +* YY_INPUT, overriding: Generated Scanner. (line 71) +* YY_START, example: Start Conditions. (line 185) +* YY_USER_ACTION to track each time a rule is matched: Misc Macros. + (line 14) + diff --git a/doc/flex.info-2 b/doc/flex.info-2 Binary files differnew file mode 100644 index 0000000..32356d6 --- /dev/null +++ b/doc/flex.info-2 diff --git a/doc/stamp-vti b/doc/stamp-vti new file mode 100644 index 0000000..fa020ed --- /dev/null +++ b/doc/stamp-vti @@ -0,0 +1,4 @@ +@set UPDATED 6 May 2017 +@set UPDATED-MONTH May 2017 +@set EDITION 2.6.4 +@set VERSION 2.6.4 diff --git a/doc/version.texi b/doc/version.texi new file mode 100644 index 0000000..fa020ed --- /dev/null +++ b/doc/version.texi @@ -0,0 +1,4 @@ +@set UPDATED 6 May 2017 +@set UPDATED-MONTH May 2017 +@set EDITION 2.6.4 +@set VERSION 2.6.4 |