[ Index ] |
PHP Cross Reference of phpwcms V1.5.0 _r431 (28.01.12) |
[Summary view] [Print] [Text view]
1 <?php 2 3 # 4 # RFC 822/2822/5322 Email Parser 5 # 6 # By Cal Henderson <cal@iamcal.com> 7 # 8 # This code is dual licensed: 9 # CC Attribution-ShareAlike 2.5 - http://creativecommons.org/licenses/by-sa/2.5/ 10 # GPLv3 - http://www.gnu.org/copyleft/gpl.html 11 # 12 # $Revision$ 13 # 14 15 ################################################################################## 16 17 function is_valid_email_address($email, $options=array()){ 18 19 # 20 # you can pass a few different named options as a second argument, 21 # but the defaults are usually a good choice. 22 # 23 24 $defaults = array( 25 'allow_comments' => true, 26 'public_internet' => true, # turn this off for 'strict' mode 27 ); 28 29 $opts = array(); 30 foreach ($defaults as $k => $v) $opts[$k] = isset($options[$k]) ? $options[$k] : $v; 31 $options = $opts; 32 33 34 35 #################################################################################### 36 # 37 # NO-WS-CTL = %d1-8 / ; US-ASCII control characters 38 # %d11 / ; that do not include the 39 # %d12 / ; carriage return, line feed, 40 # %d14-31 / ; and white space characters 41 # %d127 42 # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z 43 # DIGIT = %x30-39 44 45 $no_ws_ctl = "[\\x01-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]"; 46 $alpha = "[\\x41-\\x5a\\x61-\\x7a]"; 47 $digit = "[\\x30-\\x39]"; 48 $cr = "\\x0d"; 49 $lf = "\\x0a"; 50 $crlf = "(?:$cr$lf)"; 51 52 53 #################################################################################### 54 # 55 # obs-char = %d0-9 / %d11 / ; %d0-127 except CR and 56 # %d12 / %d14-127 ; LF 57 # obs-text = *LF *CR *(obs-char *LF *CR) 58 # text = %d1-9 / ; Characters excluding CR and LF 59 # %d11 / 60 # %d12 / 61 # %d14-127 / 62 # obs-text 63 # obs-qp = "\" (%d0-127) 64 # quoted-pair = ("\" text) / obs-qp 65 66 $obs_char = "[\\x00-\\x09\\x0b\\x0c\\x0e-\\x7f]"; 67 $obs_text = "(?:$lf*$cr*(?:$obs_char$lf*$cr*)*)"; 68 $text = "(?:[\\x01-\\x09\\x0b\\x0c\\x0e-\\x7f]|$obs_text)"; 69 70 # 71 # there's an issue with the definition of 'text', since 'obs_text' can 72 # be blank and that allows qp's with no character after the slash. we're 73 # treating that as bad, so this just checks we have at least one 74 # (non-CRLF) character 75 # 76 77 $text = "(?:$lf*$cr*$obs_char$lf*$cr*)"; 78 $obs_qp = "(?:\\x5c[\\x00-\\x7f])"; 79 $quoted_pair = "(?:\\x5c$text|$obs_qp)"; 80 81 82 #################################################################################### 83 # 84 # obs-FWS = 1*WSP *(CRLF 1*WSP) 85 # FWS = ([*WSP CRLF] 1*WSP) / ; Folding white space 86 # obs-FWS 87 # ctext = NO-WS-CTL / ; Non white space controls 88 # %d33-39 / ; The rest of the US-ASCII 89 # %d42-91 / ; characters not including "(", 90 # %d93-126 ; ")", or "\" 91 # ccontent = ctext / quoted-pair / comment 92 # comment = "(" *([FWS] ccontent) [FWS] ")" 93 # CFWS = *([FWS] comment) (([FWS] comment) / FWS) 94 95 # 96 # note: we translate ccontent only partially to avoid an infinite loop 97 # instead, we'll recursively strip *nested* comments before processing 98 # the input. that will leave 'plain old comments' to be matched during 99 # the main parse. 100 # 101 102 $wsp = "[\\x20\\x09]"; 103 $obs_fws = "(?:$wsp+(?:$crlf$wsp+)*)"; 104 $fws = "(?:(?:(?:$wsp*$crlf)?$wsp+)|$obs_fws)"; 105 $ctext = "(?:$no_ws_ctl|[\\x21-\\x27\\x2A-\\x5b\\x5d-\\x7e])"; 106 $ccontent = "(?:$ctext|$quoted_pair)"; 107 $comment = "(?:\\x28(?:$fws?$ccontent)*$fws?\\x29)"; 108 $cfws = "(?:(?:$fws?$comment)*(?:$fws?$comment|$fws))"; 109 110 111 # 112 # these are the rules for removing *nested* comments. we'll just detect 113 # outer comment and replace it with an empty comment, and recurse until 114 # we stop. 115 # 116 117 $outer_ccontent_dull = "(?:$fws?$ctext|$quoted_pair)"; 118 $outer_ccontent_nest = "(?:$fws?$comment)"; 119 $outer_comment = "(?:\\x28$outer_ccontent_dull*(?:$outer_ccontent_nest$outer_ccontent_dull*)+$fws?\\x29)"; 120 121 122 #################################################################################### 123 # 124 # atext = ALPHA / DIGIT / ; Any character except controls, 125 # "!" / "#" / ; SP, and specials. 126 # "$" / "%" / ; Used for atoms 127 # "&" / "'" / 128 # "*" / "+" / 129 # "-" / "/" / 130 # "=" / "?" / 131 # "^" / "_" / 132 # "`" / "{" / 133 # "|" / "}" / 134 # "~" 135 # atom = [CFWS] 1*atext [CFWS] 136 137 $atext = "(?:$alpha|$digit|[\\x21\\x23-\\x27\\x2a\\x2b\\x2d\\x2f\\x3d\\x3f\\x5e\\x5f\\x60\\x7b-\\x7e])"; 138 $atom = "(?:$cfws?(?:$atext)+$cfws?)"; 139 140 141 #################################################################################### 142 # 143 # qtext = NO-WS-CTL / ; Non white space controls 144 # %d33 / ; The rest of the US-ASCII 145 # %d35-91 / ; characters not including "\" 146 # %d93-126 ; or the quote character 147 # qcontent = qtext / quoted-pair 148 # quoted-string = [CFWS] 149 # DQUOTE *([FWS] qcontent) [FWS] DQUOTE 150 # [CFWS] 151 # word = atom / quoted-string 152 153 $qtext = "(?:$no_ws_ctl|[\\x21\\x23-\\x5b\\x5d-\\x7e])"; 154 $qcontent = "(?:$qtext|$quoted_pair)"; 155 $quoted_string = "(?:$cfws?\\x22(?:$fws?$qcontent)*$fws?\\x22$cfws?)"; 156 157 # 158 # changed the '*' to a '+' to require that quoted strings are not empty 159 # 160 161 $quoted_string = "(?:$cfws?\\x22(?:$fws?$qcontent)+$fws?\\x22$cfws?)"; 162 $word = "(?:$atom|$quoted_string)"; 163 164 165 #################################################################################### 166 # 167 # obs-local-part = word *("." word) 168 # obs-domain = atom *("." atom) 169 170 $obs_local_part = "(?:$word(?:\\x2e$word)*)"; 171 $obs_domain = "(?:$atom(?:\\x2e$atom)*)"; 172 173 174 #################################################################################### 175 # 176 # dot-atom-text = 1*atext *("." 1*atext) 177 # dot-atom = [CFWS] dot-atom-text [CFWS] 178 179 $dot_atom_text = "(?:$atext+(?:\\x2e$atext+)*)"; 180 $dot_atom = "(?:$cfws?$dot_atom_text$cfws?)"; 181 182 183 #################################################################################### 184 # 185 # domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS] 186 # dcontent = dtext / quoted-pair 187 # dtext = NO-WS-CTL / ; Non white space controls 188 # 189 # %d33-90 / ; The rest of the US-ASCII 190 # %d94-126 ; characters not including "[", 191 # ; "]", or "\" 192 193 $dtext = "(?:$no_ws_ctl|[\\x21-\\x5a\\x5e-\\x7e])"; 194 $dcontent = "(?:$dtext|$quoted_pair)"; 195 $domain_literal = "(?:$cfws?\\x5b(?:$fws?$dcontent)*$fws?\\x5d$cfws?)"; 196 197 198 #################################################################################### 199 # 200 # local-part = dot-atom / quoted-string / obs-local-part 201 # domain = dot-atom / domain-literal / obs-domain 202 # addr-spec = local-part "@" domain 203 204 $local_part = "(($dot_atom)|($quoted_string)|($obs_local_part))"; 205 $domain = "(($dot_atom)|($domain_literal)|($obs_domain))"; 206 $addr_spec = "$local_part\\x40$domain"; 207 208 209 210 # 211 # this was previously 256 based on RFC3696, but dominic's errata was accepted. 212 # 213 214 if (strlen($email) > 254) return 0; 215 216 217 # 218 # we need to strip nested comments first - we replace them with a simple comment 219 # 220 221 if ($options['allow_comments']){ 222 223 $email = email_strip_comments($outer_comment, $email, "(x)"); 224 } 225 226 227 # 228 # now match what's left 229 # 230 231 if (!preg_match("!^$addr_spec$!", $email, $m)){ 232 233 return 0; 234 } 235 236 $bits = array( 237 'local' => isset($m[1]) ? $m[1] : '', 238 'local-atom' => isset($m[2]) ? $m[2] : '', 239 'local-quoted' => isset($m[3]) ? $m[3] : '', 240 'local-obs' => isset($m[4]) ? $m[4] : '', 241 'domain' => isset($m[5]) ? $m[5] : '', 242 'domain-atom' => isset($m[6]) ? $m[6] : '', 243 'domain-literal' => isset($m[7]) ? $m[7] : '', 244 'domain-obs' => isset($m[8]) ? $m[8] : '', 245 ); 246 247 248 # 249 # we need to now strip comments from $bits[local] and $bits[domain], 250 # since we know they're in the right place and we want them out of the 251 # way for checking IPs, label sizes, etc 252 # 253 254 if ($options['allow_comments']){ 255 $bits['local'] = email_strip_comments($comment, $bits['local']); 256 $bits['domain'] = email_strip_comments($comment, $bits['domain']); 257 } 258 259 260 # 261 # length limits on segments 262 # 263 264 if (strlen($bits['local']) > 64) return 0; 265 if (strlen($bits['domain']) > 255) return 0; 266 267 268 # 269 # restrictions on domain-literals from RFC2821 section 4.1.3 270 # 271 # RFC4291 changed the meaning of :: in IPv6 addresses - i can mean one or 272 # more zero groups (updated from 2 or more). 273 # 274 275 if (strlen($bits['domain-literal'])){ 276 277 $Snum = "(\d{1,3})"; 278 $IPv4_address_literal = "$Snum\.$Snum\.$Snum\.$Snum"; 279 280 $IPv6_hex = "(?:[0-9a-fA-F]{1,4})"; 281 282 $IPv6_full = "IPv6\:$IPv6_hex(?:\:$IPv6_hex){7}"; 283 284 $IPv6_comp_part = "(?:$IPv6_hex(?:\:$IPv6_hex){0,7})?"; 285 $IPv6_comp = "IPv6\:($IPv6_comp_part\:\:$IPv6_comp_part)"; 286 287 $IPv6v4_full = "IPv6\:$IPv6_hex(?:\:$IPv6_hex){5}\:$IPv4_address_literal"; 288 289 $IPv6v4_comp_part = "$IPv6_hex(?:\:$IPv6_hex){0,5}"; 290 $IPv6v4_comp = "IPv6\:((?:$IPv6v4_comp_part)?\:\:(?:$IPv6v4_comp_part\:)?)$IPv4_address_literal"; 291 292 293 # 294 # IPv4 is simple 295 # 296 297 if (preg_match("!^\[$IPv4_address_literal\]$!", $bits['domain'], $m)){ 298 299 if (intval($m[1]) > 255) return 0; 300 if (intval($m[2]) > 255) return 0; 301 if (intval($m[3]) > 255) return 0; 302 if (intval($m[4]) > 255) return 0; 303 304 }else{ 305 306 # 307 # this should be IPv6 - a bunch of tests are needed here :) 308 # 309 310 while (1){ 311 312 if (preg_match("!^\[$IPv6_full\]$!", $bits['domain'])){ 313 break; 314 } 315 316 if (preg_match("!^\[$IPv6_comp\]$!", $bits['domain'], $m)){ 317 list($a, $b) = explode('::', $m[1]); 318 $folded = (strlen($a) && strlen($b)) ? "$a:$b" : "$a$b"; 319 $groups = explode(':', $folded); 320 if (count($groups) > 7) return 0; 321 break; 322 } 323 324 if (preg_match("!^\[$IPv6v4_full\]$!", $bits['domain'], $m)){ 325 326 if (intval($m[1]) > 255) return 0; 327 if (intval($m[2]) > 255) return 0; 328 if (intval($m[3]) > 255) return 0; 329 if (intval($m[4]) > 255) return 0; 330 break; 331 } 332 333 if (preg_match("!^\[$IPv6v4_comp\]$!", $bits['domain'], $m)){ 334 list($a, $b) = explode('::', $m[1]); 335 $b = substr($b, 0, -1); # remove the trailing colon before the IPv4 address 336 $folded = (strlen($a) && strlen($b)) ? "$a:$b" : "$a$b"; 337 $groups = explode(':', $folded); 338 if (count($groups) > 5) return 0; 339 break; 340 } 341 342 return 0; 343 } 344 } 345 }else{ 346 347 # 348 # the domain is either dot-atom or obs-domain - either way, it's 349 # made up of simple labels and we split on dots 350 # 351 352 $labels = explode('.', $bits['domain']); 353 354 355 # 356 # this is allowed by both dot-atom and obs-domain, but is un-routeable on the 357 # public internet, so we'll fail it (e.g. user@localhost) 358 # 359 360 if ($options['public_internet']){ 361 if (count($labels) == 1) return 0; 362 } 363 364 365 # 366 # checks on each label 367 # 368 369 foreach ($labels as $label){ 370 371 if (strlen($label) > 63) return 0; 372 if (substr($label, 0, 1) == '-') return 0; 373 if (substr($label, -1) == '-') return 0; 374 } 375 376 377 # 378 # last label can't be all numeric 379 # 380 381 if ($options['public_internet']){ 382 if (preg_match('!^[0-9]+$!', array_pop($labels))) return 0; 383 } 384 } 385 386 387 return 1; 388 } 389 390 ################################################################################## 391 392 function email_strip_comments($comment, $email, $replace=''){ 393 394 while (1){ 395 $new = preg_replace("!$comment!", $replace, $email); 396 if (strlen($new) == strlen($email)){ 397 return $email; 398 } 399 $email = $new; 400 } 401 } 402 403 ################################################################################## 404 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Sun Jan 29 16:31:14 2012 | Cross-referenced by PHPXref 0.7.1 |