[ Index ]

PHP Cross Reference of phpwcms V1.5.0 _r431 (28.01.12)

title

Body

[close]

/include/inc_ext/ -> rfc822.php (source)

   1  <?php
   2  
   3      #
   4      # RFC 822/2822/5322 Email Parser
   5      #
   6      # By Cal Henderson <cal@iamcal.com>
   7      #
   8      # This code is dual licensed:
   9      # CC Attribution-ShareAlike 2.5 - http://creativecommons.org/licenses/by-sa/2.5/
  10      # GPLv3 - http://www.gnu.org/copyleft/gpl.html
  11      #
  12      # $Revision$
  13      #
  14  
  15      ##################################################################################
  16  
  17  	function is_valid_email_address($email, $options=array()){
  18  
  19          #
  20          # you can pass a few different named options as a second argument,
  21          # but the defaults are usually a good choice.
  22          #
  23  
  24          $defaults = array(
  25              'allow_comments'    => true,
  26              'public_internet'    => true, # turn this off for 'strict' mode
  27          );
  28  
  29          $opts = array();
  30          foreach ($defaults as $k => $v) $opts[$k] = isset($options[$k]) ? $options[$k] : $v;
  31          $options = $opts;
  32          
  33  
  34  
  35          ####################################################################################
  36          #
  37          # NO-WS-CTL       =       %d1-8 /         ; US-ASCII control characters
  38          #                         %d11 /          ;  that do not include the
  39          #                         %d12 /          ;  carriage return, line feed,
  40          #                         %d14-31 /       ;  and white space characters
  41          #                         %d127
  42          # ALPHA          =  %x41-5A / %x61-7A   ; A-Z / a-z
  43          # DIGIT          =  %x30-39
  44  
  45          $no_ws_ctl    = "[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]";
  46          $alpha        = "[\\x41-\\x5a\\x61-\\x7a]";
  47          $digit        = "[\\x30-\\x39]";
  48          $cr        = "\\x0d";
  49          $lf        = "\\x0a";
  50          $crlf        = "(?:$cr$lf)";
  51  
  52  
  53          ####################################################################################
  54          #
  55          # obs-char        =       %d0-9 / %d11 /          ; %d0-127 except CR and
  56          #                         %d12 / %d14-127         ;  LF
  57          # obs-text        =       *LF *CR *(obs-char *LF *CR)
  58          # text            =       %d1-9 /         ; Characters excluding CR and LF
  59          #                         %d11 /
  60          #                         %d12 /
  61          #                         %d14-127 /
  62          #                         obs-text
  63          # obs-qp          =       "\" (%d0-127)
  64          # quoted-pair     =       ("\" text) / obs-qp
  65  
  66          $obs_char    = "[\\x00-\\x09\\x0b\\x0c\\x0e-\\x7f]";
  67          $obs_text    = "(?:$lf*$cr*(?:$obs_char$lf*$cr*)*)";
  68          $text        = "(?:[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f]|$obs_text)";
  69  
  70          #
  71          # there's an issue with the definition of 'text', since 'obs_text' can
  72          # be blank and that allows qp's with no character after the slash. we're
  73          # treating that as bad, so this just checks we have at least one
  74          # (non-CRLF) character
  75          #
  76  
  77          $text        = "(?:$lf*$cr*$obs_char$lf*$cr*)";
  78          $obs_qp        = "(?:\\x5c[\\x00-\\x7f])";
  79          $quoted_pair    = "(?:\\x5c$text|$obs_qp)";
  80  
  81  
  82          ####################################################################################
  83          #
  84          # obs-FWS         =       1*WSP *(CRLF 1*WSP)
  85          # FWS             =       ([*WSP CRLF] 1*WSP) /   ; Folding white space
  86          #                         obs-FWS
  87          # ctext           =       NO-WS-CTL /     ; Non white space controls
  88          #                         %d33-39 /       ; The rest of the US-ASCII
  89          #                         %d42-91 /       ;  characters not including "(",
  90          #                         %d93-126        ;  ")", or "\"
  91          # ccontent        =       ctext / quoted-pair / comment
  92          # comment         =       "(" *([FWS] ccontent) [FWS] ")"
  93          # CFWS            =       *([FWS] comment) (([FWS] comment) / FWS)
  94  
  95          #
  96          # note: we translate ccontent only partially to avoid an infinite loop
  97          # instead, we'll recursively strip *nested* comments before processing
  98          # the input. that will leave 'plain old comments' to be matched during
  99          # the main parse.
 100          #
 101  
 102          $wsp        = "[\\x20\\x09]";
 103          $obs_fws    = "(?:$wsp+(?:$crlf$wsp+)*)";
 104          $fws        = "(?:(?:(?:$wsp*$crlf)?$wsp+)|$obs_fws)";
 105          $ctext        = "(?:$no_ws_ctl|[\\x21-\\x27\\x2A-\\x5b\\x5d-\\x7e])";
 106          $ccontent    = "(?:$ctext|$quoted_pair)";
 107          $comment    = "(?:\\x28(?:$fws?$ccontent)*$fws?\\x29)";
 108          $cfws        = "(?:(?:$fws?$comment)*(?:$fws?$comment|$fws))";
 109  
 110  
 111          #
 112          # these are the rules for removing *nested* comments. we'll just detect
 113          # outer comment and replace it with an empty comment, and recurse until
 114          # we stop.
 115          #
 116  
 117          $outer_ccontent_dull    = "(?:$fws?$ctext|$quoted_pair)";
 118          $outer_ccontent_nest    = "(?:$fws?$comment)";
 119          $outer_comment        = "(?:\\x28$outer_ccontent_dull*(?:$outer_ccontent_nest$outer_ccontent_dull*)+$fws?\\x29)";
 120  
 121  
 122          ####################################################################################
 123          #
 124          # atext           =       ALPHA / DIGIT / ; Any character except controls,
 125          #                         "!" / "#" /     ;  SP, and specials.
 126          #                         "$" / "%" /     ;  Used for atoms
 127          #                         "&" / "'" /
 128          #                         "*" / "+" /
 129          #                         "-" / "/" /
 130          #                         "=" / "?" /
 131          #                         "^" / "_" /
 132          #                         "`" / "{" /
 133          #                         "|" / "}" /
 134          #                         "~"
 135          # atom            =       [CFWS] 1*atext [CFWS]
 136  
 137          $atext        = "(?:$alpha|$digit|[\\x21\\x23-\\x27\\x2a\\x2b\\x2d\\x2f\\x3d\\x3f\\x5e\\x5f\\x60\\x7b-\\x7e])";
 138          $atom        = "(?:$cfws?(?:$atext)+$cfws?)";
 139  
 140  
 141          ####################################################################################
 142          #
 143          # qtext           =       NO-WS-CTL /     ; Non white space controls
 144          #                         %d33 /          ; The rest of the US-ASCII
 145          #                         %d35-91 /       ;  characters not including "\"
 146          #                         %d93-126        ;  or the quote character
 147          # qcontent        =       qtext / quoted-pair
 148          # quoted-string   =       [CFWS]
 149          #                         DQUOTE *([FWS] qcontent) [FWS] DQUOTE
 150          #                         [CFWS]
 151          # word            =       atom / quoted-string
 152  
 153          $qtext        = "(?:$no_ws_ctl|[\\x21\\x23-\\x5b\\x5d-\\x7e])";
 154          $qcontent    = "(?:$qtext|$quoted_pair)";
 155          $quoted_string    = "(?:$cfws?\\x22(?:$fws?$qcontent)*$fws?\\x22$cfws?)";
 156  
 157          #
 158          # changed the '*' to a '+' to require that quoted strings are not empty
 159          #
 160  
 161          $quoted_string    = "(?:$cfws?\\x22(?:$fws?$qcontent)+$fws?\\x22$cfws?)";
 162          $word        = "(?:$atom|$quoted_string)";
 163  
 164  
 165          ####################################################################################
 166          #
 167          # obs-local-part  =       word *("." word)
 168          # obs-domain      =       atom *("." atom)
 169  
 170          $obs_local_part    = "(?:$word(?:\\x2e$word)*)";
 171          $obs_domain    = "(?:$atom(?:\\x2e$atom)*)";
 172  
 173  
 174          ####################################################################################
 175          #
 176          # dot-atom-text   =       1*atext *("." 1*atext)
 177          # dot-atom        =       [CFWS] dot-atom-text [CFWS]
 178  
 179          $dot_atom_text    = "(?:$atext+(?:\\x2e$atext+)*)";
 180          $dot_atom    = "(?:$cfws?$dot_atom_text$cfws?)";
 181  
 182  
 183          ####################################################################################
 184          #
 185          # domain-literal  =       [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS]
 186          # dcontent        =       dtext / quoted-pair
 187          # dtext           =       NO-WS-CTL /     ; Non white space controls
 188          # 
 189          #                         %d33-90 /       ; The rest of the US-ASCII
 190          #                         %d94-126        ;  characters not including "[",
 191          #                                         ;  "]", or "\"
 192  
 193          $dtext        = "(?:$no_ws_ctl|[\\x21-\\x5a\\x5e-\\x7e])";
 194          $dcontent    = "(?:$dtext|$quoted_pair)";
 195          $domain_literal    = "(?:$cfws?\\x5b(?:$fws?$dcontent)*$fws?\\x5d$cfws?)";
 196  
 197  
 198          ####################################################################################
 199          #
 200          # local-part      =       dot-atom / quoted-string / obs-local-part
 201          # domain          =       dot-atom / domain-literal / obs-domain
 202          # addr-spec       =       local-part "@" domain
 203  
 204          $local_part    = "(($dot_atom)|($quoted_string)|($obs_local_part))";
 205          $domain        = "(($dot_atom)|($domain_literal)|($obs_domain))";
 206          $addr_spec    = "$local_part\\x40$domain";
 207  
 208  
 209  
 210          #
 211          # this was previously 256 based on RFC3696, but dominic's errata was accepted.
 212          #
 213  
 214          if (strlen($email) > 254) return 0;
 215  
 216  
 217          #
 218          # we need to strip nested comments first - we replace them with a simple comment
 219          #
 220  
 221          if ($options['allow_comments']){
 222  
 223              $email = email_strip_comments($outer_comment, $email, "(x)");
 224          }
 225  
 226  
 227          #
 228          # now match what's left
 229          #
 230  
 231          if (!preg_match("!^$addr_spec$!", $email, $m)){
 232  
 233              return 0;
 234          }
 235  
 236          $bits = array(
 237              'local'            => isset($m[1]) ? $m[1] : '',
 238              'local-atom'        => isset($m[2]) ? $m[2] : '',
 239              'local-quoted'        => isset($m[3]) ? $m[3] : '',
 240              'local-obs'        => isset($m[4]) ? $m[4] : '',
 241              'domain'        => isset($m[5]) ? $m[5] : '',
 242              'domain-atom'        => isset($m[6]) ? $m[6] : '',
 243              'domain-literal'    => isset($m[7]) ? $m[7] : '',
 244              'domain-obs'        => isset($m[8]) ? $m[8] : '',
 245          );
 246  
 247  
 248          #
 249          # we need to now strip comments from $bits[local] and $bits[domain],
 250          # since we know they're in the right place and we want them out of the
 251          # way for checking IPs, label sizes, etc
 252          #
 253  
 254          if ($options['allow_comments']){
 255              $bits['local']    = email_strip_comments($comment, $bits['local']);
 256              $bits['domain']    = email_strip_comments($comment, $bits['domain']);
 257          }
 258  
 259  
 260          #
 261          # length limits on segments
 262          #
 263  
 264          if (strlen($bits['local']) > 64) return 0;
 265          if (strlen($bits['domain']) > 255) return 0;
 266  
 267  
 268          #
 269          # restrictions on domain-literals from RFC2821 section 4.1.3
 270          #
 271          # RFC4291 changed the meaning of :: in IPv6 addresses - i can mean one or
 272          # more zero groups (updated from 2 or more).
 273          #
 274  
 275          if (strlen($bits['domain-literal'])){
 276  
 277              $Snum            = "(\d{1,3})";
 278              $IPv4_address_literal    = "$Snum\.$Snum\.$Snum\.$Snum";
 279  
 280              $IPv6_hex        = "(?:[0-9a-fA-F]{1,4})";
 281  
 282              $IPv6_full        = "IPv6\:$IPv6_hex(?:\:$IPv6_hex){7}";
 283  
 284              $IPv6_comp_part        = "(?:$IPv6_hex(?:\:$IPv6_hex){0,7})?";
 285              $IPv6_comp        = "IPv6\:($IPv6_comp_part\:\:$IPv6_comp_part)";
 286  
 287              $IPv6v4_full        = "IPv6\:$IPv6_hex(?:\:$IPv6_hex){5}\:$IPv4_address_literal";
 288  
 289              $IPv6v4_comp_part    = "$IPv6_hex(?:\:$IPv6_hex){0,5}";
 290              $IPv6v4_comp        = "IPv6\:((?:$IPv6v4_comp_part)?\:\:(?:$IPv6v4_comp_part\:)?)$IPv4_address_literal";
 291  
 292  
 293              #
 294              # IPv4 is simple
 295              #
 296  
 297              if (preg_match("!^\[$IPv4_address_literal\]$!", $bits['domain'], $m)){
 298  
 299                  if (intval($m[1]) > 255) return 0;
 300                  if (intval($m[2]) > 255) return 0;
 301                  if (intval($m[3]) > 255) return 0;
 302                  if (intval($m[4]) > 255) return 0;
 303  
 304              }else{
 305  
 306                  #
 307                  # this should be IPv6 - a bunch of tests are needed here :)
 308                  #
 309  
 310                  while (1){
 311  
 312                      if (preg_match("!^\[$IPv6_full\]$!", $bits['domain'])){
 313                          break;
 314                      }
 315  
 316                      if (preg_match("!^\[$IPv6_comp\]$!", $bits['domain'], $m)){
 317                          list($a, $b) = explode('::', $m[1]);
 318                          $folded = (strlen($a) && strlen($b)) ? "$a:$b" : "$a$b";
 319                          $groups = explode(':', $folded);
 320                          if (count($groups) > 7) return 0;
 321                          break;
 322                      }
 323  
 324                      if (preg_match("!^\[$IPv6v4_full\]$!", $bits['domain'], $m)){
 325  
 326                          if (intval($m[1]) > 255) return 0;
 327                          if (intval($m[2]) > 255) return 0;
 328                          if (intval($m[3]) > 255) return 0;
 329                          if (intval($m[4]) > 255) return 0;
 330                          break;
 331                      }
 332  
 333                      if (preg_match("!^\[$IPv6v4_comp\]$!", $bits['domain'], $m)){
 334                          list($a, $b) = explode('::', $m[1]);
 335                          $b = substr($b, 0, -1); # remove the trailing colon before the IPv4 address
 336                          $folded = (strlen($a) && strlen($b)) ? "$a:$b" : "$a$b";
 337                          $groups = explode(':', $folded);
 338                          if (count($groups) > 5) return 0;
 339                          break;
 340                      }
 341  
 342                      return 0;
 343                  }
 344              }            
 345          }else{
 346  
 347              #
 348              # the domain is either dot-atom or obs-domain - either way, it's
 349              # made up of simple labels and we split on dots
 350              #
 351  
 352              $labels = explode('.', $bits['domain']);
 353  
 354  
 355              #
 356              # this is allowed by both dot-atom and obs-domain, but is un-routeable on the
 357              # public internet, so we'll fail it (e.g. user@localhost)
 358              #
 359  
 360              if ($options['public_internet']){
 361                  if (count($labels) == 1) return 0;
 362              }
 363  
 364  
 365              #
 366              # checks on each label
 367              #
 368  
 369              foreach ($labels as $label){
 370  
 371                  if (strlen($label) > 63) return 0;
 372                  if (substr($label, 0, 1) == '-') return 0;
 373                  if (substr($label, -1) == '-') return 0;
 374              }
 375  
 376  
 377              #
 378              # last label can't be all numeric
 379              #
 380  
 381              if ($options['public_internet']){
 382                  if (preg_match('!^[0-9]+$!', array_pop($labels))) return 0;
 383              }
 384          }
 385  
 386  
 387          return 1;
 388      }
 389  
 390      ##################################################################################
 391  
 392  	function email_strip_comments($comment, $email, $replace=''){
 393  
 394          while (1){
 395              $new = preg_replace("!$comment!", $replace, $email);
 396              if (strlen($new) == strlen($email)){
 397                  return $email;
 398              }
 399              $email = $new;
 400          }
 401      }
 402  
 403      ##################################################################################
 404  ?>


Generated: Sun Jan 29 16:31:14 2012 Cross-referenced by PHPXref 0.7.1