summaryrefslogtreecommitdiff
path: root/fromcheck.nfa
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2016-08-18 07:57:33 -0700
committerSean Whitton <spwhitton@spwhitton.name>2016-08-18 07:57:33 -0700
commita48a7d6bba5d8030ef2aba8419fbabf55f7831ba (patch)
tree87fed0fce7a199a927b2cb7b1ddeeebe94350b00 /fromcheck.nfa
Imported Upstream version 0.23+git20131125
Diffstat (limited to 'fromcheck.nfa')
-rw-r--r--fromcheck.nfa218
1 files changed, 218 insertions, 0 deletions
diff --git a/fromcheck.nfa b/fromcheck.nfa
new file mode 100644
index 0000000..40f3996
--- /dev/null
+++ b/fromcheck.nfa
@@ -0,0 +1,218 @@
+#########################################################################
+#
+# mairix - message index builder and finder for maildir folders.
+#
+# Copyright (C) Richard P. Curnow 2002-2004,2006
+# Copyright (C) Jonathan Kamens 2010
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# =======================================================================
+
+%{
+#include "from.h"
+%}
+
+
+# Define tokens
+# CR : \n
+# DIGIT : [0-9]
+# AT : @
+# COLON : :
+# WHITE : ' ', \t
+# LOWER : [a-z]
+# UPPER : [A-Z]
+# PLUSMINUS : [+-]
+# OTHER_EMAIL : other stuff valid in the LHS of an address
+# DOMAIN : stuff valid in the RHS of an address
+
+Abbrev LF = [\n]
+Abbrev CR = [\r]
+Abbrev DIGIT = [0-9]
+Abbrev PERIOD = [.]
+Abbrev AT = [@]
+Abbrev LOWER = [a-z]
+Abbrev UPPER = [A-Z]
+Abbrev COLON = [:]
+Abbrev WHITE = [ \t]
+Abbrev PLUSMINUS = [+\-]
+# Explained clearly at
+# http://en.wikipedia.org/wiki/E-mail_address#RFC_specification
+Abbrev OTHER_EMAIL = [.!#$%&'*/=?^_`{|}~]
+Abbrev LT = [<]
+Abbrev GT = [>]
+Abbrev EMAIL = LOWER | UPPER | DIGIT | PLUSMINUS | OTHER_EMAIL
+Abbrev OTHER_DOMAIN = [\-_.]
+Abbrev DOMAIN = LOWER | UPPER | DIGIT | OTHER_DOMAIN
+Abbrev DQUOTE = ["]
+Abbrev OTHER_QUOTED = [@:<>]
+Abbrev LEFTSQUARE = [[]
+Abbrev RIGHTSQUARE = [\]]
+
+BLOCK email {
+ STATE in
+ EMAIL -> in, before_at
+ DQUOTE -> quoted_before_at
+ AT -> domain_route
+
+ STATE domain_route
+ DOMAIN -> domain_route
+ COLON -> in
+
+ STATE quoted_before_at
+ EMAIL | WHITE | OTHER_QUOTED -> quoted_before_at
+ DQUOTE -> before_at
+
+ STATE before_at
+ EMAIL -> before_at
+ DQUOTE -> quoted_before_at
+ # Local part only : >=1 characters will suffice, which we've already
+ # matched.
+ -> out
+ AT -> start_of_domain
+
+ STATE start_of_domain
+ LEFTSQUARE -> dotted_quad
+ DOMAIN -> after_at
+
+ STATE dotted_quad
+ DIGIT | PERIOD -> dotted_quad
+ RIGHTSQUARE -> out
+
+ STATE after_at
+ DOMAIN -> after_at, out
+
+}
+
+BLOCK angled_email {
+ STATE in
+ LT -> in_angles
+
+ STATE in_angles
+ <email:in->out> -> before_gt
+
+ STATE before_gt
+ GT -> out
+}
+
+BLOCK zone {
+ # Make this pretty lenient
+ STATE in
+ UPPER -> zone2
+ UPPER -> out
+ PLUSMINUS -> zone2
+
+ STATE zone2
+ UPPER | LOWER -> zone2, out
+ DIGIT -> zone2, out
+}
+
+BLOCK date {
+ STATE in
+ WHITE -> in, before_weekday
+
+ STATE before_weekday
+ UPPER ; LOWER ; LOWER ; WHITE -> after_weekday
+
+ STATE after_weekday
+ WHITE -> after_weekday
+ UPPER ; LOWER ; LOWER ; WHITE -> after_month
+
+ STATE after_month
+ WHITE -> after_month
+ DIGIT ; WHITE -> after_day
+ DIGIT ; DIGIT ; WHITE -> after_day
+
+ STATE after_day
+ WHITE -> after_day
+ # Accept HH:MM:SS
+ DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time
+ # Accept HH:MM
+ DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time
+
+ # Allow either 1 or 2 words of timezone
+ STATE after_time
+ WHITE -> after_time
+ -> after_timezone
+ <zone:in->out> ; WHITE -> after_timezone
+ <zone:in->out> ; WHITE -> after_timezone_1
+
+ # It appears that Pine puts the timezone after the year
+ DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year_before_zone
+
+ STATE after_year_before_zone
+ WHITE -> after_year_before_zone
+ <zone:in->out> -> after_timezone_after_year
+ <zone:in->out> ; WHITE -> after_timezone_after_year_1
+
+ STATE after_timezone_after_year_1
+ WHITE -> after_timezone_after_year_1
+ <zone:in->out> -> after_timezone_after_year
+
+ STATE after_timezone_after_year
+ WHITE -> after_timezone_after_year
+ -> out
+
+ STATE after_timezone_1
+ WHITE -> after_timezone_1
+ <zone:in->out> ; WHITE -> after_timezone
+
+ STATE after_timezone
+ WHITE -> after_timezone
+ DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year
+
+ STATE after_year
+ WHITE -> after_year
+ -> out
+
+}
+
+# Assume the earlier code has identified the '\nFrom ' sequence,
+# and the validator starts scanning from the character beyond the space
+
+BLOCK main {
+
+ STATE in
+ # Real return address.
+ WHITE -> in
+ <email:in->out> -> before_date
+ <angled_email:in->out> -> before_date
+
+ # Cope with Mozilla mbox folder format which just uses a '-' as
+ # the return address field.
+ PLUSMINUS -> before_date
+
+ # Empty return address
+ -> before_date
+
+ STATE before_date
+ <date:in->out> ; LF = FROMCHECK_PASS
+
+ # Cope with mozilla mbox format
+ <date:in->out> ; CR ; LF = FROMCHECK_PASS
+
+ # Mention this state last : the last mentioned state in the last defined
+ # block becomes the entry state of the scanner.
+
+ STATE in
+
+}
+
+ATTR FROMCHECK_PASS
+ATTR FROMCHECK_FAIL
+DEFATTR FROMCHECK_FAIL
+PREFIX fromcheck
+TYPE "enum fromcheck_result"
+
+# vim:ft=txt:et:sw=4:sts=4:ht=4