Changeset 18636 for trunk/include/functions_search.inc.php
- Timestamp:
- Oct 13, 2012, 5:40:14 PM (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/functions_search.inc.php
r18207 r18636 306 306 { 307 307 $ch = $q[$i]; 308 if ( $crt_token_modifier&QST_QUOTED==0)308 if ( ($crt_token_modifier&QST_QUOTED)==0) 309 309 { 310 310 if ($ch=='"') … … 357 357 $crt_token = ""; $crt_token_modifier = 0; 358 358 $state=0; 359 break;360 359 } 361 360 else … … 363 362 } 364 363 } 364 365 365 if (strlen($crt_token)) 366 366 { … … 390 390 391 391 /** 392 * returns the LIKE sql clause corresponding to the quick search query 392 * returns the LIKE sql clause corresponding to the quick search query 393 393 * that has been split into tokens 394 394 * for example file LIKE '%john%' OR file LIKE '%bill%'. … … 410 410 411 411 return count($clauses) ? '('.implode(' OR ', $clauses).')' : null; 412 } 413 414 /** 415 */ 416 function get_qsearch_tags($tokens, $token_modifiers, &$token_tag_ids, &$not_tag_ids, &$all_tags) 417 { 418 $token_tag_ids = array_fill(0, count($tokens), array() ); 419 $not_tag_ids = $all_tags = array(); 420 421 $token_tag_scores = $token_tag_ids; 422 $transliterated_tokens = array(); 423 foreach ($tokens as $token) 424 { 425 $transliterated_tokens[] = transliterate($token); 426 } 427 428 $query = ' 429 SELECT t.*, COUNT(image_id) AS counter 430 FROM '.TAGS_TABLE.' t 431 INNER JOIN '.IMAGE_TAG_TABLE.' ON id=tag_id 432 GROUP BY id'; 433 $result = pwg_query($query); 434 while ($tag = pwg_db_fetch_assoc($result)) 435 { 436 $transliterated_tag = transliterate($tag['name']); 437 438 // find how this tag matches query tokens 439 for ($i=0; $i<count($tokens); $i++) 440 { 441 $transliterated_token = $transliterated_tokens[$i]; 442 443 $match = false; 444 $pos = 0; 445 while ( ($pos = strpos($transliterated_tag, $transliterated_token, $pos)) !== false) 446 { 447 if ( ($token_modifiers[$i]&QST_WILDCARD)==QST_WILDCARD ) 448 {// wildcard in this token 449 $match = 1; 450 break; 451 } 452 $token_len = strlen($transliterated_token); 453 454 // search begin of word 455 $wbegin_len=0; $wbegin_char=' '; 456 while ($pos-$wbegin_len > 0) 457 { 458 if (! is_word_char($transliterated_tag[$pos-$wbegin_len-1]) ) 459 { 460 $wbegin_char = $transliterated_tag[$pos-$wbegin_len-1]; 461 break; 462 } 463 $wbegin_len++; 464 } 465 466 // search end of word 467 $wend_len=0; $wend_char=' '; 468 while ($pos+$token_len+$wend_len < strlen($transliterated_tag)) 469 { 470 if (! is_word_char($transliterated_tag[$pos+$token_len+$wend_len]) ) 471 { 472 $wend_char = $transliterated_tag[$pos+$token_len+$wend_len]; 473 break; 474 } 475 $wend_len++; 476 } 477 478 $this_score = 0; 479 if ( ($token_modifiers[$i]&QST_WILDCARD)==0 ) 480 {// no wildcard begin or end 481 if ($token_len <= 2) 482 {// search for 1 or 2 characters must match exactly to avoid retrieving too much data 483 if ($wbegin_len==0 && $wend_len==0 && !is_odd_wbreak_begin($wbegin_char) && !is_odd_wbreak_end($wend_char) ) 484 $this_score = 1; 485 } 486 elseif ($token_len == 3) 487 { 488 if ($wbegin_len==0) 489 $this_score = $token_len / ($token_len + $wend_len); 490 } 491 else 492 { 493 $this_score = $token_len / ($token_len + 1.1 * $wbegin_len + 0.9 * $wend_len); 494 } 495 } 496 497 if ($this_score>0) 498 $match = max($match, $this_score ); 499 $pos++; 500 } 501 502 if ($match) 503 { 504 $tag_id = (int)$tag['id']; 505 $all_tags[$tag_id] = $tag; 506 $token_tag_ids[$i][] = $tag_id; 507 $token_tag_scores[$i][] = $match; 508 } 509 } 510 } 511 512 // process not tags 513 for ($i=0; $i<count($tokens); $i++) 514 { 515 if ( ! ($token_modifiers[$i]&QST_NOT) ) 516 continue; 517 518 array_multisort($token_tag_scores[$i], SORT_DESC|SORT_NUMERIC, $token_tag_ids[$i]); 519 520 for ($j=0; $j<count($token_tag_scores[$i]); $j++) 521 { 522 if ($token_tag_scores[$i][$j] < 0.8) 523 break; 524 if ($j>0 && $token_tag_scores[$i][$j] < $token_tag_scores[$i][0]) 525 break; 526 $tag_id = $token_tag_ids[$i][$j]; 527 if ( isset($all_tags[$tag_id]) ) 528 { 529 unset($all_tags[$tag_id]); 530 $not_tag_ids[] = $tag_id; 531 } 532 } 533 $token_tag_ids[$i] = array(); 534 } 535 536 // process regular tags 537 for ($i=0; $i<count($tokens); $i++) 538 { 539 if ( $token_modifiers[$i]&QST_NOT ) 540 continue; 541 542 array_multisort($token_tag_scores[$i], SORT_DESC|SORT_NUMERIC, $token_tag_ids[$i]); 543 544 $counter = 0; 545 for ($j=0; $j<count($token_tag_scores[$i]); $j++) 546 { 547 $tag_id = $token_tag_ids[$i][$j]; 548 if ( ! isset($all_tags[$tag_id]) ) 549 { 550 array_splice($token_tag_ids[$i], $j, 1); 551 array_splice($token_tag_scores[$i], $j, 1); 552 } 553 554 $counter += $all_tags[$tag_id]['counter']; 555 if ($counter > 200 && $j>0 && $token_tag_scores[$i][0] > $token_tag_scores[$i][$j] ) 556 {// "many" images in previous tags and starting from this tag is less relevent 557 array_splice($token_tag_ids[$i], $j); 558 array_splice($token_tag_scores[$i], $j); 559 break; 560 } 561 } 562 } 563 564 usort($all_tags, 'tag_alpha_compare'); 565 foreach ( $all_tags as &$tag ) 566 $tag['name'] = trigger_event('render_tag_name', $tag['name']); 412 567 } 413 568 … … 445 600 } 446 601 $debug[] = '<!--'.count($tokens).' tokens'; 447 602 448 603 $q_like_field = '@@__db_field__@@'; //something never in a search 449 604 $q_like_clause = get_qsearch_like_clause($tokens, $token_modifiers, $q_like_field ); … … 486 641 } 487 642 $debug[] = count($by_weights).' fulltext'; 488 $debug[] = 'ft score min:'.min($by_weights).' max:'.max($by_weights); 489 490 491 // Step 2 - search tags corresponding to the query $q ======================== 492 $transliterated_tokens = array(); 493 $token_tags = array(); 494 foreach ($tokens as $token) 495 { 496 $transliterated_tokens[] = transliterate($token); 497 $token_tags[] = array(); 498 } 499 500 // Step 2.1 - find match tags for every token in the query search 501 $all_tags = array(); 502 $query = ' 503 SELECT id, name, url_name, COUNT(image_id) AS nb_images 504 FROM '.TAGS_TABLE.' 505 INNER JOIN '.IMAGE_TAG_TABLE.' ON id=tag_id 506 GROUP BY id'; 507 $result = pwg_query($query); 508 while ($tag = pwg_db_fetch_assoc($result)) 509 { 510 $transliterated_tag = transliterate($tag['name']); 511 512 // find how this tag matches query tokens 513 for ($i=0; $i<count($tokens); $i++) 514 { 515 if ($token_modifiers[$i]&QST_NOT) 516 continue;// ignore this NOT token 517 $transliterated_token = $transliterated_tokens[$i]; 518 519 $match = false; 520 $pos = 0; 521 while ( ($pos = strpos($transliterated_tag, $transliterated_token, $pos)) !== false) 522 { 523 if ( ($token_modifiers[$i]&QST_WILDCARD)==QST_WILDCARD ) 524 {// wildcard in this token 525 $match = 1; 526 break; 527 } 528 $token_len = strlen($transliterated_token); 529 530 // search begin of word 531 $wbegin_len=0; $wbegin_char=' '; 532 while ($pos-$wbegin_len > 0) 533 { 534 if (! is_word_char($transliterated_tag[$pos-$wbegin_len-1]) ) 535 { 536 $wbegin_char = $transliterated_tag[$pos-$wbegin_len-1]; 537 break; 538 } 539 $wbegin_len++; 540 } 541 542 // search end of word 543 $wend_len=0; $wend_char=' '; 544 while ($pos+$token_len+$wend_len < strlen($transliterated_tag)) 545 { 546 if (! is_word_char($transliterated_tag[$pos+$token_len+$wend_len]) ) 547 { 548 $wend_char = $transliterated_tag[$pos+$token_len+$wend_len]; 549 break; 550 } 551 $wend_len++; 552 } 553 554 $this_score = 0; 555 if ( ($token_modifiers[$i]&QST_WILDCARD)==0 ) 556 {// no wildcard begin or end 557 if ($token_len <= 2) 558 {// search for 1 or 2 characters must match exactly to avoid retrieving too much data 559 if ($wbegin_len==0 && $wend_len==0 && !is_odd_wbreak_begin($wbegin_char) && !is_odd_wbreak_end($wend_char) ) 560 $this_score = 1; 561 } 562 elseif ($token_len == 3) 563 { 564 if ($wbegin_len==0) 565 $this_score = $token_len / ($token_len + $wend_len); 566 } 567 else 568 { 569 $this_score = $token_len / ($token_len + 1.1 * $wbegin_len + 0.9 * $wend_len); 570 } 571 } 572 573 if ($this_score>0) 574 $match = max($match, $this_score ); 575 $pos++; 576 } 577 578 if ($match) 579 { 580 $tag_id = (int)$tag['id']; 581 $all_tags[$tag_id] = $tag; 582 $token_tags[$i][] = array('tag_id'=>$tag_id, 'score'=>$match); 583 } 584 } 585 } 586 $search_results['qs']['matching_tags']=$all_tags; 587 $debug[] = count($all_tags).' tags'; 588 589 // Step 2.2 - reduce matching tags for every token in the query search 590 $score_cmp_fn = create_function('$a,$b', 'return 100*($b["score"]-$a["score"]);'); 591 foreach ($token_tags as &$tt) 592 { 593 usort($tt, $score_cmp_fn); 594 $nb_images = 0; 595 $prev_score = 0; 596 for ($j=0; $j<count($tt); $j++) 597 { 598 if ($nb_images > 200 && $prev_score > $tt[$j]['score'] ) 599 {// "many" images in previous tags and starting from this tag is less relevent 600 $tt = array_slice( $tt, 0, $j); 601 break; 602 } 603 $nb_images += $all_tags[ $tt[$j]['tag_id'] ]['nb_images']; 604 $prev_score = $tt[$j]['score']; 605 } 606 } 607 608 // Step 2.3 - get the images for tags 609 for ($i=0; $i<count($token_tags); $i++) 610 { 611 $tag_ids = array(); 612 foreach($token_tags[$i] as $arr) 613 $tag_ids[] = $arr['tag_id']; 614 $tag_ids = array_unique($tag_ids); 643 if (!empty($by_weights)) 644 { 645 $debug[] = 'ft score min:'.min($by_weights).' max:'.max($by_weights); 646 } 647 648 649 // Step 2 - get the tags and the images for tags 650 get_qsearch_tags($tokens, $token_modifiers, $token_tag_ids, $not_tag_ids, $search_results['qs']['matching_tags']); 651 $debug[] = count($search_results['qs']['matching_tags']).' tags'; 652 653 for ($i=0; $i<count($token_tag_ids); $i++) 654 { 655 $tag_ids = $token_tag_ids[$i]; 615 656 $debug[] = count($tag_ids).' unique tags'; 616 657 … … 619 660 $tag_photo_count=0; 620 661 $query = ' 621 SELECT image_id 622 FROM '.IMAGE_TAG_TABLE.' 662 SELECT image_id FROM '.IMAGE_TAG_TABLE.' 623 663 WHERE tag_id IN ('.implode(',',$tag_ids).') 624 664 GROUP BY image_id'; … … 630 670 $tag_photo_count++; 631 671 } 632 $debug[] = $tag_photo_count.' photos for tag s';633 $debug[] = count($by_weights).' photos after tags';672 $debug[] = $tag_photo_count.' photos for tag'; 673 $debug[] = count($by_weights).' photos after'; 634 674 } 635 675 } … … 664 704 } 665 705 706 if (!empty($not_tag_ids)) 707 { 708 $query = ' 709 SELECT image_id FROM '.IMAGE_TAG_TABLE.' 710 WHERE tag_id IN ('.implode(',',$not_tag_ids).') 711 GROUP BY image_id'; 712 $result = pwg_query($query); 713 while ($row = pwg_db_fetch_row($result)) 714 { 715 $id = $row[0]; 716 unset($by_weights[$id]); 717 } 718 $debug[] = count($by_weights).' after not tags'; 719 } 666 720 // Step 4 - now we have $by_weights ( array image id => weight ) that need 667 721 // permission checks and/or matching categories to get images from
Note: See TracChangeset
for help on using the changeset viewer.