Changeset 10340
- Timestamp:
- Apr 12, 2011, 9:46:36 PM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/functions_search.inc.php
r8728 r10340 266 266 } 267 267 268 269 if (function_exists('mb_strtolower')) 270 { 271 function transliterate($term) 272 { 273 return remove_accents( mb_strtolower($term) ); 274 } 275 } 276 else 277 { 278 function transliterate($term) 279 { 280 return remove_accents( strtolower($term) ); 281 } 282 } 283 284 function is_word_char($ch) 285 { 286 return ($ch>='0' && $ch<='9') || ($ch>='a' && $ch<='z') || ($ch>='A' && $ch<='Z') || ord($ch)>127; 287 } 288 268 289 /** 269 * returns the LIKE sql clause corresponding to the quick search query $q 270 * and the field $field. example q='john bill', field='file' will return 271 * file LIKE '%john%' OR file LIKE '%bill%'. Special characters for MySql full 272 * text search (+,<,>,~) are omitted. The query can contain a phrase: 273 * 'Pierre "New York"' will return LIKE '%Pierre%' OR LIKE '%New York%'. 274 * @param string q 275 * @param string field 276 * @return string 290 * analyzes and splits the quick/query search query $q into tokens 291 * q='john bill' => 2 tokens 'john' 'bill' 292 * Special characters for MySql full text search (+,<,>,~) appear in the token modifiers. 293 * The query can contain a phrase: 'Pierre "New York"' will return 'pierre' qnd 'new york'. 277 294 */ 278 function get_qsearch_like_clause($q, $field, $before='%', $after='%')295 function analyse_qsearch($q, &$qtokens, &$qtoken_modifiers) 279 296 { 280 297 $q = stripslashes($q); … … 293 310 if ($ch=='"') 294 311 { 312 $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier; 313 $crt_token = ""; $crt_token_modifier = "q"; 314 $state=1; 315 } 316 elseif ( $ch=='*' ) 317 { // wild card 295 318 if (strlen($crt_token)) 296 319 { 297 $tokens[] = $crt_token; 298 $token_modifiers[] = $crt_token_modifier; 299 $crt_token = ""; 300 $crt_token_modifier = ""; 320 $crt_token .= $ch; 301 321 } 302 $state=1; 303 } 304 elseif ( $ch=='*' ) 305 { // wild card 306 $crt_token .= '%'; 322 else 323 { 324 $crt_token_modifier .= '*'; 325 } 307 326 } 308 327 elseif ( strcspn($ch, '+-><~')==0 ) … … 310 329 if (strlen($crt_token)) 311 330 { 312 $tokens[] = $crt_token; 313 $token_modifiers[] = $crt_token_modifier; 314 $crt_token = ""; 315 $crt_token_modifier = ""; 331 $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier; 332 $crt_token = ""; $crt_token_modifier = ""; 316 333 } 317 334 $crt_token_modifier .= $ch; … … 321 338 if (strlen($crt_token)) 322 339 { 323 $tokens[] = $crt_token; 324 $token_modifiers[] = $crt_token_modifier; 325 $crt_token = ""; 326 $crt_token_modifier = ""; 340 $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier; 341 $crt_token = ""; $crt_token_modifier = ""; 327 342 } 328 343 } 329 344 else 330 345 { 331 if ( strcspn($ch, '%_')==0)332 {// escape LIKE specials %_333 $ch = '\\'.$ch;334 }335 346 $crt_token .= $ch; 336 347 } … … 340 351 { 341 352 case '"': 342 $tokens[] = $crt_token; 343 $token_modifiers[] = $crt_token_modifier; 344 $crt_token = ""; 345 $crt_token_modifier = ""; 353 $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier; 354 $crt_token = ""; $crt_token_modifier = ""; 346 355 $state=0; 347 356 break; 348 357 default: 349 if ( strcspn($ch, '%_')==0)350 {// escape LIKE specials %_351 $ch = '\\'.$ch;352 }353 358 $crt_token .= $ch; 354 359 } … … 362 367 } 363 368 369 $qtokens = array(); 370 $qtoken_modifiers = array(); 371 for ($i=0; $i<count($tokens); $i++) 372 { 373 if (strstr($token_modifiers[$i], 'q')===false) 374 { 375 if ( substr($tokens[$i], -1)=='*' ) 376 { 377 $tokens[$i] = rtrim($tokens[$i], '*'); 378 $token_modifiers[$i] .= '*'; 379 } 380 } 381 if ( strlen($tokens[$i])==0) 382 continue; 383 $qtokens[] = $tokens[$i]; 384 $qtoken_modifiers[] = $token_modifiers[$i]; 385 } 386 } 387 388 389 /** 390 * returns the LIKE sql clause corresponding to the quick search query 391 * that has been split into tokens 392 * for example file LIKE '%john%' OR file LIKE '%bill%'. 393 */ 394 function get_qsearch_like_clause($tokens, $token_modifiers, $field) 395 { 364 396 $clauses = array(); 365 397 for ($i=0; $i<count($tokens); $i++) 366 398 { 367 $token s[$i]= trim($tokens[$i], '%');399 $token = trim($tokens[$i], '%'); 368 400 if (strstr($token_modifiers[$i], '-')!==false) 369 401 continue; 370 if ( strlen($token s[$i])==0)402 if ( strlen($token==0) ) 371 403 continue; 372 $clauses[] = $field.' LIKE \''.$before.addslashes($tokens[$i]).$after.'\''; 404 $token = addslashes($token); 405 $token = str_replace( array('%','_'), array('\\%','\\_'), $token); // escape LIKE specials %_ 406 $clauses[] = $field.' LIKE \'%'.$token.'%\''; 373 407 } 374 408 375 409 return count($clauses) ? '('.implode(' OR ', $clauses).')' : null; 376 410 } 377 378 411 379 412 /** … … 396 429 function get_quick_search_results($q, $super_order_by, $images_where='') 397 430 { 431 global $user, $conf; 432 398 433 $search_results = 399 434 array( … … 406 441 return $search_results; 407 442 } 443 444 analyse_qsearch($q, $tokens, $token_modifiers); 445 408 446 $q_like_field = '@@__db_field__@@'; //something never in a search 409 $q_like_clause = get_qsearch_like_clause($q, $q_like_field ); 410 447 $q_like_clause = get_qsearch_like_clause($tokens, $token_modifiers, $q_like_field ); 411 448 412 449 // Step 1 - first we find matches in #images table =========================== … … 449 486 450 487 // Step 2 - search tags corresponding to the query $q ======================== 451 if (!empty($q_like_clause)) 452 { // search name and url name (without accents) 453 $query = ' 454 SELECT id, name, url_name 488 $transliterated_tokens = array(); 489 $token_tags = array(); 490 foreach ($tokens as $token) 491 { 492 $transliterated_tokens[] = transliterate($token); 493 $token_tags[] = array(); 494 } 495 496 // Step 2.1 - find match tags for every token in the query search 497 $all_tags = array(); 498 $query = ' 499 SELECT id, name, url_name, COUNT(image_id) AS nb_images 455 500 FROM '.TAGS_TABLE.' 456 WHERE ('.str_replace($q_like_field, 'CONVERT(name, CHAR)', $q_like_clause).' 457 OR '.str_replace($q_like_field, 'url_name', $q_like_clause).')'; 458 $tags = hash_from_query($query, 'id'); 459 if ( !empty($tags) ) 460 { // we got some tags; get the images 461 $search_results['qs']['matching_tags']=$tags; 501 INNER JOIN '.IMAGE_TAG_TABLE.' ON id=tag_id 502 GROUP BY id'; 503 $result = pwg_query($query); 504 while ($tag = pwg_db_fetch_assoc($result)) 505 { 506 $transliterated_tag = transliterate($tag['name']); 507 508 // find how this tag matches query tokens 509 for ($i=0; $i<count($tokens); $i++) 510 { 511 if (strstr($token_modifiers[$i], '-')!==false) 512 continue;// ignore this NOT token 513 $transliterated_token = $transliterated_tokens[$i]; 514 515 $match = false; 516 $pos = 0; 517 while ( ($pos = strpos($transliterated_tag, $transliterated_token, $pos)) !== false) 518 { 519 if (strstr($token_modifiers[$i], '*')!==false) 520 {// wildcard in this token 521 $match = 1; 522 break; 523 } 524 $token_len = strlen($transliterated_token); 525 526 $word_begin = $pos; 527 while ($word_begin>0) 528 { 529 if (! is_word_char($transliterated_tag[$word_begin-1]) ) 530 break; 531 $word_begin--; 532 } 533 534 $word_end = $pos + $token_len; 535 while ($word_end<strlen($transliterated_tag) && is_word_char($transliterated_tag[$word_end]) ) 536 $word_end++; 537 538 $this_score = $token_len / ($word_end-$word_begin); 539 if ($token_len <= 2) 540 {// search for 1 or 2 characters must match exactly to avoid retrieving too much data 541 if ($token_len != $word_end-$word_begin) 542 $this_score = 0; 543 } 544 elseif ($token_len == 3) 545 { 546 if ($word_end-$word_begin > 4) 547 $this_score = 0; 548 } 549 550 if ($this_score>0) 551 $match = max($match, $this_score ); 552 $pos++; 553 } 554 555 if ($match) 556 { 557 $tag_id = (int)$tag['id']; 558 $all_tags[$tag_id] = $tag; 559 $token_tags[$i][] = array('tag_id'=>$tag_id, 'score'=>$match); 560 } 561 } 562 } 563 $search_results['qs']['matching_tags']=$all_tags; 564 565 // Step 2.2 - reduce matching tags for every token in the query search 566 $score_cmp_fn = create_function('$a,$b', 'return 100*($b["score"]-$a["score"]);'); 567 foreach ($token_tags as &$tt) 568 { 569 usort($tt, $score_cmp_fn); 570 $nb_images = 0; 571 $prev_score = 0; 572 for ($j=0; $j<count($tt); $j++) 573 { 574 if ($nb_images > 200 && $prev_score > $tt[$j]['score'] ) 575 {// "many" images in previous tags and starting from this tag is less relevent 576 $tt = array_slice( $tt, 0, $j); 577 break; 578 } 579 $nb_images += $all_tags[ $tt[$j]['tag_id'] ]['nb_images']; 580 $prev_score = $tt[$j]['score']; 581 } 582 } 583 584 // Step 2.3 - get the images for tags 585 for ($i=0; $i<count($token_tags); $i++) 586 { 587 $tag_ids = array(); 588 foreach($token_tags[$i] as $arr) 589 $tag_ids[] = $arr['tag_id']; 590 591 if (!empty($tag_ids)) 592 { 462 593 $query = ' 463 SELECT image_id , COUNT(tag_id) AS weight594 SELECT image_id 464 595 FROM '.IMAGE_TAG_TABLE.' 465 WHERE tag_id IN ('.implode(',', array_keys($tags)).')596 WHERE tag_id IN ('.implode(',',$tag_ids).') 466 597 GROUP BY image_id'; 467 598 $result = pwg_query($query); … … 469 600 { // weight is important when sorting images by relevance 470 601 $image_id=(int)$row['image_id']; 471 @$by_weights[$image_id] += $row['weight'];602 @$by_weights[$image_id] += 1; 472 603 } 473 604 } 474 605 } 475 606 476 477 607 // Step 3 - search categories corresponding to the query $q ================== 478 global $user;479 608 $query = ' 480 609 SELECT id, name, permalink, nb_images … … 532 661 ); 533 662 534 global $conf;535 663 $query = ' 536 664 SELECT DISTINCT(id)
Note: See TracChangeset
for help on using the changeset viewer.