Changeset 17749
- Timestamp:
- Sep 4, 2012, 10:04:34 PM (12 years ago)
- Location:
- branches/2.4
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2.4/include/functions.inc.php
r15598 r17749 204 204 * @param string Str 205 205 */ 206 function seems_utf8($Str) { # by bmorel at ssi dot fr 206 function seems_utf8($Str) { 207 // OBSOLETE !!! 208 return qualify_utf8($Str) >= 0; 209 } 210 211 /* returns 0 if $str is Ascii, 1 if utf-8, -1 otherwise */ 212 function qualify_utf8($Str) 213 { 214 $ret = 0; 207 215 for ($i=0; $i<strlen($Str); $i++) { 208 216 if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb 209 elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb 217 $ret = 1; 218 if ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb 210 219 elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb 211 220 elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb 212 221 elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb 213 222 elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b 214 else return false; # Does not match any model223 else return -1; # Does not match any model 215 224 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 216 225 if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) 217 return false;218 } 219 } 220 return true;226 return -1; 227 } 228 } 229 return $ret; 221 230 } 222 231 … … 226 235 function remove_accents($string) 227 236 { 228 if ( !preg_match('/[\x80-\xff]/', $string) ) 229 return $string; 230 231 if (seems_utf8($string)) { 237 $utf = qualify_utf8($string); 238 if ( $utf == 0 ) 239 return $string; // ascii 240 241 if ( $utf > 0 ) { 232 242 $chars = array( 233 243 // Decompositions for Latin-1 Supplement … … 324 334 "\xc5\xbc"=>'z', "\xc5\xbd"=>'Z', 325 335 "\xc5\xbe"=>'z', "\xc5\xbf"=>'s', 336 // Decompositions for Latin Extended-B 337 "\xc8\x98"=>'S', "\xc8\x99"=>'s', 338 "\xc8\x9a"=>'T', "\xc8\x9b"=>'t', 326 339 // Euro Sign 327 340 "\xe2\x82\xac"=>'E', … … 354 367 } 355 368 369 if (function_exists('mb_strtolower') && defined('PWG_CHARSET')) 370 { 371 function transliterate($term) 372 { 373 return remove_accents( mb_strtolower($term, PWG_CHARSET) ); 374 } 375 } 376 else 377 { 378 function transliterate($term) 379 { 380 return remove_accents( strtolower($term) ); 381 } 382 } 383 384 385 356 386 /** 357 387 * simplify a string to insert it into an URL … … 362 392 function str2url($str) 363 393 { 364 $raw = $str; 365 366 $str = remove_accents($str); 367 $str = preg_replace('/[^a-z0-9_\s\'\:\/\[\],-]/','',strtolower($str)); 394 $str = $safe = transliterate($str); 395 $str = preg_replace('/[^\x80-\xffa-z0-9_\s\'\:\/\[\],-]/','',$str); 368 396 $str = preg_replace('/[\s\'\:\/\[\],-]+/',' ',trim($str)); 369 397 $res = str_replace(' ','_',$str); … … 371 399 if (empty($res)) 372 400 { 373 $res = str_replace(' ','_', $ raw);401 $res = str_replace(' ','_', $safe); 374 402 } 375 403 -
branches/2.4/include/functions_html.inc.php
r15384 r17749 301 301 if (!isset($cache[__FUNCTION__][ $tag['name'] ])) 302 302 { 303 $cache[__FUNCTION__][ $tag['name'] ] = strtolower(str2url($tag['name']));303 $cache[__FUNCTION__][ $tag['name'] ] = transliterate($tag['name']); 304 304 } 305 305 } -
branches/2.4/include/functions_metadata.inc.php
r12922 r17749 91 91 // how to detect it so a plugin should do the trick. 92 92 $value = trigger_event('clean_iptc_value', $value); 93 $is_utf8 = seems_utf8($value); 94 $value = convert_charset( $value, 95 $is_utf8 ? 'utf-8' : 'iso-8859-1', 96 get_pwg_charset() ); 93 if ( ($qual = qualify_utf8($value)) != 0) 94 {// has non ascii chars 95 $value = convert_charset( $value, 96 $qual>0 ? 'utf-8' : 'iso-8859-1', 97 get_pwg_charset() ); 98 } 97 99 } 98 100 return $value; -
branches/2.4/include/functions_search.inc.php
r12922 r17749 267 267 268 268 269 if (function_exists('mb_strtolower'))270 {271 function transliterate($term)272 {273 return remove_accents( mb_strtolower($term) );274 }275 }276 else277 {278 function transliterate($term)279 {280 return remove_accents( strtolower($term) );281 }282 }283 284 269 function is_word_char($ch) 285 270 { -
branches/2.4/tags.php
r15578 r17749 100 100 foreach ($tags as $tag) 101 101 { 102 $tag_letter = strtoupper(mb_substr(str2url($tag['name']), 0, 1, 'utf-8'));102 $tag_letter = mb_strtoupper(mb_substr(transliterate($tag['name']), 0, 1, PWG_CHARSET), PWG_CHARSET); 103 103 104 104 if ($current_tag_idx==0) {
Note: See TracChangeset
for help on using the changeset viewer.