[ Index ] |
PHP Cross Reference of phpwcms V1.5.0 _r431 (28.01.12) |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * Checks to see if a string is utf8 encoded. 5 * 6 * NOTE: This function checks for 5-Byte sequences, UTF8 7 * has Bytes Sequences with a maximum length of 4. 8 * 9 * @author bmorel at ssi dot fr (modified) 10 * @package WordPress 11 * 12 * @param string $str The string to be checked 13 * @return bool True if $str fits a UTF-8 model, false otherwise. 14 */ 15 function phpwcms_seems_utf8($str) { # by bmorel at ssi dot fr 16 $length = strlen($str); 17 for ($i=0; $i < $length; $i++) { 18 $c = ord($str[$i]); 19 if ($c < 0x80) $n = 0; # 0bbbbbbb 20 elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb 21 elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb 22 elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb 23 elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb 24 elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b 25 else return false; # Does not match any model 26 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 27 if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80)) 28 return false; 29 } 30 } 31 return true; 32 } 33 34 /** 35 * Converts all accent characters to ASCII characters. 36 * 37 * If there are no accent characters, then the string given is just returned. 38 * @package WordPress 39 * 40 * @param string $string Text that might have accent characters 41 * @return string Filtered string with replaced "nice" characters. 42 */ 43 function phpwcms_remove_accents($string) { 44 if ( !preg_match('/[\x80-\xff]/', $string) ) 45 return $string; 46 47 if (phpwcms_seems_utf8($string)) { 48 $chars = array( 49 // Decompositions for Latin-1 Supplement 50 chr(195).chr(128) => 'A', chr(195).chr(129) => 'A', 51 chr(195).chr(130) => 'A', chr(195).chr(131) => 'A', 52 chr(195).chr(132) => 'A', chr(195).chr(133) => 'A', 53 chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C', 54 chr(195).chr(136) => 'E', chr(195).chr(137) => 'E', 55 chr(195).chr(138) => 'E', chr(195).chr(139) => 'E', 56 chr(195).chr(140) => 'I', chr(195).chr(141) => 'I', 57 chr(195).chr(142) => 'I', chr(195).chr(143) => 'I', 58 chr(195).chr(144) => 'D', chr(195).chr(145) => 'N', 59 chr(195).chr(146) => 'O', chr(195).chr(147) => 'O', 60 chr(195).chr(148) => 'O', chr(195).chr(149) => 'O', 61 chr(195).chr(150) => 'O', chr(195).chr(153) => 'U', 62 chr(195).chr(154) => 'U', chr(195).chr(155) => 'U', 63 chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y', 64 chr(195).chr(158) => 'TH',chr(195).chr(159) => 's', 65 chr(195).chr(160) => 'a', chr(195).chr(161) => 'a', 66 chr(195).chr(162) => 'a', chr(195).chr(163) => 'a', 67 chr(195).chr(164) => 'a', chr(195).chr(165) => 'a', 68 chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c', 69 chr(195).chr(168) => 'e', chr(195).chr(169) => 'e', 70 chr(195).chr(170) => 'e', chr(195).chr(171) => 'e', 71 chr(195).chr(172) => 'i', chr(195).chr(173) => 'i', 72 chr(195).chr(174) => 'i', chr(195).chr(175) => 'i', 73 chr(195).chr(176) => 'd', chr(195).chr(177) => 'n', 74 chr(195).chr(178) => 'o', chr(195).chr(179) => 'o', 75 chr(195).chr(180) => 'o', chr(195).chr(181) => 'o', 76 chr(195).chr(182) => 'o', chr(195).chr(184) => 'o', 77 chr(195).chr(185) => 'u', chr(195).chr(186) => 'u', 78 chr(195).chr(187) => 'u', chr(195).chr(188) => 'u', 79 chr(195).chr(189) => 'y', chr(195).chr(190) => 'th', 80 chr(195).chr(191) => 'y', 81 // Decompositions for Latin Extended-A 82 chr(196).chr(128) => 'A', chr(196).chr(129) => 'a', 83 chr(196).chr(130) => 'A', chr(196).chr(131) => 'a', 84 chr(196).chr(132) => 'A', chr(196).chr(133) => 'a', 85 chr(196).chr(134) => 'C', chr(196).chr(135) => 'c', 86 chr(196).chr(136) => 'C', chr(196).chr(137) => 'c', 87 chr(196).chr(138) => 'C', chr(196).chr(139) => 'c', 88 chr(196).chr(140) => 'C', chr(196).chr(141) => 'c', 89 chr(196).chr(142) => 'D', chr(196).chr(143) => 'd', 90 chr(196).chr(144) => 'D', chr(196).chr(145) => 'd', 91 chr(196).chr(146) => 'E', chr(196).chr(147) => 'e', 92 chr(196).chr(148) => 'E', chr(196).chr(149) => 'e', 93 chr(196).chr(150) => 'E', chr(196).chr(151) => 'e', 94 chr(196).chr(152) => 'E', chr(196).chr(153) => 'e', 95 chr(196).chr(154) => 'E', chr(196).chr(155) => 'e', 96 chr(196).chr(156) => 'G', chr(196).chr(157) => 'g', 97 chr(196).chr(158) => 'G', chr(196).chr(159) => 'g', 98 chr(196).chr(160) => 'G', chr(196).chr(161) => 'g', 99 chr(196).chr(162) => 'G', chr(196).chr(163) => 'g', 100 chr(196).chr(164) => 'H', chr(196).chr(165) => 'h', 101 chr(196).chr(166) => 'H', chr(196).chr(167) => 'h', 102 chr(196).chr(168) => 'I', chr(196).chr(169) => 'i', 103 chr(196).chr(170) => 'I', chr(196).chr(171) => 'i', 104 chr(196).chr(172) => 'I', chr(196).chr(173) => 'i', 105 chr(196).chr(174) => 'I', chr(196).chr(175) => 'i', 106 chr(196).chr(176) => 'I', chr(196).chr(177) => 'i', 107 chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij', 108 chr(196).chr(180) => 'J', chr(196).chr(181) => 'j', 109 chr(196).chr(182) => 'K', chr(196).chr(183) => 'k', 110 chr(196).chr(184) => 'k', chr(196).chr(185) => 'L', 111 chr(196).chr(186) => 'l', chr(196).chr(187) => 'L', 112 chr(196).chr(188) => 'l', chr(196).chr(189) => 'L', 113 chr(196).chr(190) => 'l', chr(196).chr(191) => 'L', 114 chr(197).chr(128) => 'l', chr(197).chr(129) => 'L', 115 chr(197).chr(130) => 'l', chr(197).chr(131) => 'N', 116 chr(197).chr(132) => 'n', chr(197).chr(133) => 'N', 117 chr(197).chr(134) => 'n', chr(197).chr(135) => 'N', 118 chr(197).chr(136) => 'n', chr(197).chr(137) => 'N', 119 chr(197).chr(138) => 'n', chr(197).chr(139) => 'N', 120 chr(197).chr(140) => 'O', chr(197).chr(141) => 'o', 121 chr(197).chr(142) => 'O', chr(197).chr(143) => 'o', 122 chr(197).chr(144) => 'O', chr(197).chr(145) => 'o', 123 chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe', 124 chr(197).chr(148) => 'R',chr(197).chr(149) => 'r', 125 chr(197).chr(150) => 'R',chr(197).chr(151) => 'r', 126 chr(197).chr(152) => 'R',chr(197).chr(153) => 'r', 127 chr(197).chr(154) => 'S',chr(197).chr(155) => 's', 128 chr(197).chr(156) => 'S',chr(197).chr(157) => 's', 129 chr(197).chr(158) => 'S',chr(197).chr(159) => 's', 130 chr(197).chr(160) => 'S', chr(197).chr(161) => 's', 131 chr(197).chr(162) => 'T', chr(197).chr(163) => 't', 132 chr(197).chr(164) => 'T', chr(197).chr(165) => 't', 133 chr(197).chr(166) => 'T', chr(197).chr(167) => 't', 134 chr(197).chr(168) => 'U', chr(197).chr(169) => 'u', 135 chr(197).chr(170) => 'U', chr(197).chr(171) => 'u', 136 chr(197).chr(172) => 'U', chr(197).chr(173) => 'u', 137 chr(197).chr(174) => 'U', chr(197).chr(175) => 'u', 138 chr(197).chr(176) => 'U', chr(197).chr(177) => 'u', 139 chr(197).chr(178) => 'U', chr(197).chr(179) => 'u', 140 chr(197).chr(180) => 'W', chr(197).chr(181) => 'w', 141 chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y', 142 chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z', 143 chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z', 144 chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z', 145 chr(197).chr(190) => 'z', chr(197).chr(191) => 's', 146 // Decompositions for Latin Extended-B 147 chr(200).chr(152) => 'S', chr(200).chr(153) => 's', 148 chr(200).chr(154) => 'T', chr(200).chr(155) => 't', 149 // Euro Sign 150 chr(226).chr(130).chr(172) => 'E', 151 // GBP (Pound) Sign 152 chr(194).chr(163) => ''); 153 154 $string = strtr($string, $chars); 155 } else { 156 // Assume ISO-8859-1 if not UTF-8 157 $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158) 158 .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194) 159 .chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202) 160 .chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210) 161 .chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218) 162 .chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227) 163 .chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235) 164 .chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243) 165 .chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251) 166 .chr(252).chr(253).chr(255); 167 168 $chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy"; 169 170 $string = strtr($string, $chars['in'], $chars['out']); 171 $double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254)); 172 $double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'); 173 $string = str_replace($double_chars['in'], $double_chars['out'], $string); 174 } 175 176 return $string; 177 } 178 179 function pre_remove_accents($string) { 180 181 $string = strtr($string, array('Þ' => 'TH', 'þ' => 'th', 'Ð' => 'DH', 'ð' => 'dh', 'ß' => 'ss', 182 'Œ' => 'OE', 'œ' => 'oe', 'Æ' => 'AE', 'æ' => 'ae', 'µ' => 'u', 183 'Ä' => 'AE', 'Ü' => 'UE', 'Ö' => 'OE', 'ä' => 'ae', 'ü' => 'ue', 184 'ö' => 'oe' ) ); 185 $string = strtr($string, 'ŠŽšžŸÀÁÂÃÅÇÈÉÊËÌÍÎÏÑÒÓÔÕØÙÚÛÝàáâãåçèéêëìíîïñòóôõøùúûýÿ', 186 'SZszYAAAAACEEEEIIIINOOOOOUUUYaaaaaceeeeiiiinooooouuuyy'); 187 return $string; 188 } 189 190 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Sun Jan 29 16:31:14 2012 | Cross-referenced by PHPXref 0.7.1 |