summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Fenk <fenk@users.sourceforge.net>2000-08-10 19:11:21 +0000
committerRobert Fenk <fenk@users.sourceforge.net>2000-08-10 19:11:21 +0000
commitbe30e651ac7c8f40dfca71899956c44bfead8fe2 (patch)
treef1c6c0a9924eb798226be1dc0c37e98d2bd69433
parent4c993a4d380a7f2be2ab9770e09a138606a6e727 (diff)
(bbdb-snarf-nice-real-name-regexp): regexp
matching unwanted characters used by (bbdb-snarf-nice-real-name): removes unwanted characters from real names/email addresses. (bbdb-extract-address-component-regexps): alist of regexps and transformation-instructions used by (bbdb-extract-address-components): is for the extraction of full name and email address from headers. This function is a bit more configurable than `mail-extract-address-components' and it will return a list of all found address components. Some note about why to use this extract-function. `bbdb-show-all-recipients' uses `bbdb-split' ,but this will not work for email addresses like "Boss, Hugo" <hugo@boss.com> or hugop@boss.com (Hugo Boss) On the otherside `mail-extr.el' returns just the first mail address, but we may be interrested also in the other addresses within one header. Furthermore this function is configureable to do smart parsing ... Files: lisp/bbdb-snarf.el
-rw-r--r--lisp/bbdb-snarf.el186
1 files changed, 186 insertions, 0 deletions
diff --git a/lisp/bbdb-snarf.el b/lisp/bbdb-snarf.el
index e6ebd4e..07515e2 100644
--- a/lisp/bbdb-snarf.el
+++ b/lisp/bbdb-snarf.el
@@ -384,4 +384,190 @@ more details."
;; return
new-record)
+;;----------------------------------------------------------------------------
+;; Emacs 20.3 seems to miss the function replace-in-string?
+(if (not (functionp 'replace-in-string))
+ ;; actually this is dired-replace-in-string slightly modified
+ (defun replace-in-string (string regexp newtext &optional literal)
+ ;; Replace REGEXP with NEWTEXT everywhere in STRING and return result.
+ ;; NEWTEXT is taken literally---no \\DIGIT escapes will be recognized.
+ (let ((result "") (start 0) mb me)
+ (while (string-match regexp string start)
+ (setq mb (match-beginning 0)
+ me (match-end 0)
+ result (concat result (substring string start mb) newtext)
+ start me))
+ (concat result (substring string start)))))
+
+(defcustom bbdb-snarf-nice-real-name-regexp "[._,\t\n ]+"
+ "*Regexp matching string which `bbdb-wash-address' will replaced by
+a space."
+ :group 'bbdb-noticing-records
+ :type 'string)
+
+(defun bbdb-snarf-nice-real-name (str)
+ "Removes unwanted characters form STR in order to get a nice full name.
+Remove any unwanted characters specifyed by `bbdb-wash-address-regexp',
+capitalize words and change order of names when separated by comma."
+ (if str
+ (progn
+ (if (string-match "^\\([^,]+\\)\\s-*,\\s-*\\([^,]+\\)$" str)
+ (setq str (concat (match-string 2 str) " "
+ (match-string 1 str))))
+ (capitalize (replace-in-string str
+ bbdb-snarf-nice-real-name-regexp
+ " ")))
+ nil))
+
+(defcustom bbdb-extract-address-component-regexps
+ '(;; "'surname, firstname'" <address> from Outlookers
+ ("\"'\\([^\"]*\\)'\"\\s-*<\\([^>]+\\)>"
+ (bbdb-snarf-nice-real-name (match-string 1 adstring)) 2)
+ ;; "name" <address>
+ ("\"\\([^\"]*\\)\"\\s-*<\\([^>]+\\)>"
+ 1 2)
+ ;; name <address>
+ ("\\(\\b[^<\",]*\\b\\)\\s-*<\\([^>]+\\)>"
+ 1 2)
+ ;; name <address>
+ ("<\\([^>]+\\)>" nil 2)
+ ;; address (name)
+ ("\\(\\b[^<\",()]+\\b\\)\\s-*(\\([^)]+\\))"
+ 2 1)
+ ;; user@host
+ ("\\b\\(\\([0-9a-z._-]+\\)@[0-9a-z._-]+\\)\\b"
+ nil 1)
+ ;; local address
+ ("\\b\\([0-9a-z._-]+\\)\\b"
+ nil 1)
+ )
+ "*List of regexps matching headers.
+Each list element should have the form (REGEXP FULLNAME ADDRESS), where
+REGEXP matches the address while the actual address components should
+be a parenthesized expression.
+
+FULLNAME is a default string for addresses without full name or a
+number denoting parenthesized expression.
+ADDRESS is a number denoting the parenthesized expression matching the
+address.
+
+If FULLNAME or ADDRESS is a list it will be evaluated to return a
+string or nil. If its a function it will be called with the remaining
+address-string as argument."
+ :group 'bbdb-noticing-records
+ :type 'list)
+
+(defcustom bbdb-extract-address-component-ignore-regexp
+ "\\(\\(undisclosed\\|unlisted\\)[^,]*recipients\\)\\|no To-header on input"
+ "*A regexp matching addresses which should be ignored."
+ :group 'bbdb-noticing-records
+ :type 'string)
+
+(defcustom bbdb-extract-address-component-handler 'message
+ "*Specifies how `bbdb-extract-address-components' reports errors.
+
+A value of nil means ignore unparsable stuff and 'warn will report
+a warning, 'message will report a message in the minibuffer and all
+other value will fire a error.
+
+When set to a function it will be called with the remaining string in
+order to extract the address components and return the rest and the
+components as list or to do what ever it wants, e.g. send a complain
+to the author ...
+
+To skip known unpareable stuff you rather should set the variable
+`bbdb-extract-address-component-ignore-regexp' instead of disabling
+this handler."
+ :group 'bbdb-noticing-records)
+
+;;;###autoload
+(defun bbdb-extract-address-components (adstring)
+ "Return a list of address components found in ADSTRING.
+If extracting fails one probably has to adjust the variable
+`bbdb-extract-address-component-regexps'."
+ (let ((case-fold-search t)
+ (fnadlist nil)
+ adcom-regexp
+ nomatch)
+
+ ;; Do some string cleanup and trimming
+ (setq adstring (replace-in-string adstring "[\n\t]" " "))
+ (setq adstring (replace-in-string adstring " " " "))
+
+ ;; scan the string
+ (while (not (string= "" adstring))
+ (setq adcom-regexp bbdb-extract-address-component-regexps
+ nomatch t)
+ (while adcom-regexp
+ (let ((regexp (caar adcom-regexp))
+ (fn (cadar adcom-regexp))
+ (ad (caddar adcom-regexp)))
+ (cond ((string-match
+ (concat "^[^,]*\\("
+ bbdb-extract-address-component-ignore-regexp
+ "\\)[^,]*\\(,\\|$\\)")
+ adstring)
+ (setq adstring (substring adstring (match-end 0))
+ adcom-regexp nil
+ nomatch nil))
+ ((string-match (concat "^\\s-*" regexp "\\s-*\\(,\\|$\\)")
+ adstring)
+
+ (add-to-list 'fnadlist
+ (list (cond ((numberp fn)
+ (match-string fn adstring))
+ ((listp fn)
+ (save-match-data (eval fn)))
+ ((functionp fn)
+ (save-match-data
+ (funcall fn adstring)))
+ (t fn))
+ (cond ((numberp ad)
+ (match-string ad adstring))
+ ((listp ad)
+ (save-match-data (eval ad)))
+ ((functionp ad)
+ (save-match-data
+ (funcall ad adstring)))
+ (t ad))))
+ (setq adstring (substring adstring (match-end 0))
+ adcom-regexp nil
+ nomatch nil)))
+ (setq adcom-regexp (cdr adcom-regexp))))
+
+ ;; Now handle problems
+ (if nomatch
+ (cond ((equal bbdb-extract-address-component-handler nil))
+ ((equal bbdb-extract-address-component-handler 'warn)
+ (warn "Cannot extract an address component at \"%s\".
+See `bbdb-extract-address-component-handler' for more information."
+ adstring))
+ ((equal bbdb-extract-address-component-handler 'message)
+ (message "Cannot extract an address component at \"%s\"."
+ adstring))
+ ((functionp bbdb-extract-address-component-handler)
+ (let ((result
+ (funcall bbdb-extract-address-component-handler
+ adstring)))
+ (if (and (listp result) (= 3 (length result))
+ (stringp (car result))
+ (stringp (cadr result))
+ (stringp (caddr result)))
+ (progn (add-to-list 'fnadlist (cdr result))
+ (setq adstring (car result)
+ nomatch nil)))))
+ (t
+ (error "Cannot extract an address component at \"%30s\""
+ adstring))))
+
+ ;; ignore the bad junk
+ (if nomatch
+ (if (string-match "^[^,]*," adstring)
+ (setq adstring (substring adstring (match-end 0)))
+ (setq adstring ""))))
+
+ (delete '(nil nil) (nreverse fnadlist))))
+
+;;----------------------------------------------------------------------------
+
(provide 'bbdb-snarf)