diff options
author | Sean Whitton <spwhitton@spwhitton.name> | 2016-08-18 07:57:33 -0700 |
---|---|---|
committer | Sean Whitton <spwhitton@spwhitton.name> | 2016-08-18 07:57:33 -0700 |
commit | a48a7d6bba5d8030ef2aba8419fbabf55f7831ba (patch) | |
tree | 87fed0fce7a199a927b2cb7b1ddeeebe94350b00 /fromcheck.nfa |
Imported Upstream version 0.23+git20131125
Diffstat (limited to 'fromcheck.nfa')
-rw-r--r-- | fromcheck.nfa | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/fromcheck.nfa b/fromcheck.nfa new file mode 100644 index 0000000..40f3996 --- /dev/null +++ b/fromcheck.nfa @@ -0,0 +1,218 @@ +######################################################################### +# +# mairix - message index builder and finder for maildir folders. +# +# Copyright (C) Richard P. Curnow 2002-2004,2006 +# Copyright (C) Jonathan Kamens 2010 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# ======================================================================= + +%{ +#include "from.h" +%} + + +# Define tokens +# CR : \n +# DIGIT : [0-9] +# AT : @ +# COLON : : +# WHITE : ' ', \t +# LOWER : [a-z] +# UPPER : [A-Z] +# PLUSMINUS : [+-] +# OTHER_EMAIL : other stuff valid in the LHS of an address +# DOMAIN : stuff valid in the RHS of an address + +Abbrev LF = [\n] +Abbrev CR = [\r] +Abbrev DIGIT = [0-9] +Abbrev PERIOD = [.] +Abbrev AT = [@] +Abbrev LOWER = [a-z] +Abbrev UPPER = [A-Z] +Abbrev COLON = [:] +Abbrev WHITE = [ \t] +Abbrev PLUSMINUS = [+\-] +# Explained clearly at +# http://en.wikipedia.org/wiki/E-mail_address#RFC_specification +Abbrev OTHER_EMAIL = [.!#$%&'*/=?^_`{|}~] +Abbrev LT = [<] +Abbrev GT = [>] +Abbrev EMAIL = LOWER | UPPER | DIGIT | PLUSMINUS | OTHER_EMAIL +Abbrev OTHER_DOMAIN = [\-_.] +Abbrev DOMAIN = LOWER | UPPER | DIGIT | OTHER_DOMAIN +Abbrev DQUOTE = ["] +Abbrev OTHER_QUOTED = [@:<>] +Abbrev LEFTSQUARE = [[] +Abbrev RIGHTSQUARE = [\]] + +BLOCK email { + STATE in + EMAIL -> in, before_at + DQUOTE -> quoted_before_at + AT -> domain_route + + STATE domain_route + DOMAIN -> domain_route + COLON -> in + + STATE quoted_before_at + EMAIL | WHITE | OTHER_QUOTED -> quoted_before_at + DQUOTE -> before_at + + STATE before_at + EMAIL -> before_at + DQUOTE -> quoted_before_at + # Local part only : >=1 characters will suffice, which we've already + # matched. + -> out + AT -> start_of_domain + + STATE start_of_domain + LEFTSQUARE -> dotted_quad + DOMAIN -> after_at + + STATE dotted_quad + DIGIT | PERIOD -> dotted_quad + RIGHTSQUARE -> out + + STATE after_at + DOMAIN -> after_at, out + +} + +BLOCK angled_email { + STATE in + LT -> in_angles + + STATE in_angles + <email:in->out> -> before_gt + + STATE before_gt + GT -> out +} + +BLOCK zone { + # Make this pretty lenient + STATE in + UPPER -> zone2 + UPPER -> out + PLUSMINUS -> zone2 + + STATE zone2 + UPPER | LOWER -> zone2, out + DIGIT -> zone2, out +} + +BLOCK date { + STATE in + WHITE -> in, before_weekday + + STATE before_weekday + UPPER ; LOWER ; LOWER ; WHITE -> after_weekday + + STATE after_weekday + WHITE -> after_weekday + UPPER ; LOWER ; LOWER ; WHITE -> after_month + + STATE after_month + WHITE -> after_month + DIGIT ; WHITE -> after_day + DIGIT ; DIGIT ; WHITE -> after_day + + STATE after_day + WHITE -> after_day + # Accept HH:MM:SS + DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time + # Accept HH:MM + DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time + + # Allow either 1 or 2 words of timezone + STATE after_time + WHITE -> after_time + -> after_timezone + <zone:in->out> ; WHITE -> after_timezone + <zone:in->out> ; WHITE -> after_timezone_1 + + # It appears that Pine puts the timezone after the year + DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year_before_zone + + STATE after_year_before_zone + WHITE -> after_year_before_zone + <zone:in->out> -> after_timezone_after_year + <zone:in->out> ; WHITE -> after_timezone_after_year_1 + + STATE after_timezone_after_year_1 + WHITE -> after_timezone_after_year_1 + <zone:in->out> -> after_timezone_after_year + + STATE after_timezone_after_year + WHITE -> after_timezone_after_year + -> out + + STATE after_timezone_1 + WHITE -> after_timezone_1 + <zone:in->out> ; WHITE -> after_timezone + + STATE after_timezone + WHITE -> after_timezone + DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year + + STATE after_year + WHITE -> after_year + -> out + +} + +# Assume the earlier code has identified the '\nFrom ' sequence, +# and the validator starts scanning from the character beyond the space + +BLOCK main { + + STATE in + # Real return address. + WHITE -> in + <email:in->out> -> before_date + <angled_email:in->out> -> before_date + + # Cope with Mozilla mbox folder format which just uses a '-' as + # the return address field. + PLUSMINUS -> before_date + + # Empty return address + -> before_date + + STATE before_date + <date:in->out> ; LF = FROMCHECK_PASS + + # Cope with mozilla mbox format + <date:in->out> ; CR ; LF = FROMCHECK_PASS + + # Mention this state last : the last mentioned state in the last defined + # block becomes the entry state of the scanner. + + STATE in + +} + +ATTR FROMCHECK_PASS +ATTR FROMCHECK_FAIL +DEFATTR FROMCHECK_FAIL +PREFIX fromcheck +TYPE "enum fromcheck_result" + +# vim:ft=txt:et:sw=4:sts=4:ht=4 |