- Timestamp:
- Sep 24, 2012, 10:50:24 PM (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/functions_search.inc.php
r17748 r18207 272 272 } 273 273 274 function is_odd_wbreak_begin($ch) 275 { 276 return strpos('[{<=*+', $ch)===false ? false:true; 277 } 278 279 function is_odd_wbreak_end($ch) 280 { 281 return strpos(']}>=*+', $ch)===false ? false:true; 282 } 283 284 define('QST_QUOTED', 0x01); 285 define('QST_NOT', 0x02); 286 define('QST_WILDCARD_BEGIN',0x04); 287 define('QST_WILDCARD_END', 0x08); 288 define('QST_WILDCARD', QST_WILDCARD_BEGIN|QST_WILDCARD_END); 289 290 274 291 /** 275 292 * analyzes and splits the quick/query search query $q into tokens … … 284 301 $token_modifiers = array(); 285 302 $crt_token = ""; 286 $crt_token_modifier = ""; 287 $state = 0; 303 $crt_token_modifier = 0; 288 304 289 305 for ($i=0; $i<strlen($q); $i++) 290 306 { 291 307 $ch = $q[$i]; 292 switch ($state) 293 { 294 case 0: 308 if ($crt_token_modifier&QST_QUOTED==0) 309 { 295 310 if ($ch=='"') 296 311 { 297 $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier; 298 $crt_token = ""; $crt_token_modifier = "q"; 299 $state=1; 300 } 301 elseif ( $ch=='*' ) 302 { // wild card 312 if (strlen($crt_token)) 313 { 314 $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier; 315 $crt_token = ""; $crt_token_modifier = 0; 316 } 317 $crt_token_modifier |= QST_QUOTED; 318 } 319 elseif ( strcspn($ch, '*+-><~')==0 ) 320 { //special full text modifier 303 321 if (strlen($crt_token)) 304 322 { … … 307 325 else 308 326 { 309 $crt_token_modifier .= '*'; 310 } 311 } 312 elseif ( strcspn($ch, '+-><~')==0 ) 313 { //special full text modifier 314 if (strlen($crt_token)) 315 { 316 $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier; 317 $crt_token = ""; $crt_token_modifier = ""; 318 } 319 $crt_token_modifier .= $ch; 327 if ( $ch=='*' ) 328 $crt_token_modifier |= QST_WILDCARD_BEGIN; 329 if ( $ch=='-' ) 330 $crt_token_modifier |= QST_NOT; 331 } 320 332 } 321 333 elseif (preg_match('/[\s,.;!\?]+/', $ch)) … … 324 336 { 325 337 $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier; 326 $crt_token = ""; $crt_token_modifier = ""; 327 } 338 $crt_token = ""; 339 } 340 $crt_token_modifier = 0; 328 341 } 329 342 else … … 331 344 $crt_token .= $ch; 332 345 } 346 } 347 else // qualified with quotes 348 { 349 if ($ch=='"') 350 { 351 if ($i+1 < strlen($q) && $q[$i+1]=='*') 352 { 353 $crt_token_modifier |= QST_WILDCARD_END; 354 $i++; 355 } 356 $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier; 357 $crt_token = ""; $crt_token_modifier = 0; 358 $state=0; 333 359 break; 334 case 1: // qualified with quotes 335 switch ($ch) 336 { 337 case '"': 338 $tokens[] = $crt_token; $token_modifiers[] = $crt_token_modifier; 339 $crt_token = ""; $crt_token_modifier = ""; 340 $state=0; 341 break; 342 default: 343 $crt_token .= $ch; 344 } 345 break; 360 } 361 else 362 $crt_token .= $ch; 346 363 } 347 364 } … … 383 400 { 384 401 $token = trim($tokens[$i], '%'); 385 if ( strstr($token_modifiers[$i], '-')!==false)402 if ($token_modifiers[$i]&QST_NOT) 386 403 continue; 387 404 if ( strlen($token)==0 ) … … 422 439 ); 423 440 $q = trim($q); 424 if (empty($q)) 441 analyse_qsearch($q, $tokens, $token_modifiers); 442 if (count($tokens)==0) 425 443 { 426 444 return $search_results; 427 445 } 446 $debug[] = '<!--'.count($tokens).' tokens'; 428 447 429 analyse_qsearch($q, $tokens, $token_modifiers);430 431 448 $q_like_field = '@@__db_field__@@'; //something never in a search 432 449 $q_like_clause = get_qsearch_like_clause($tokens, $token_modifiers, $q_like_field ); … … 468 485 } 469 486 } 487 $debug[] = count($by_weights).' fulltext'; 488 $debug[] = 'ft score min:'.min($by_weights).' max:'.max($by_weights); 470 489 471 490 … … 494 513 for ($i=0; $i<count($tokens); $i++) 495 514 { 496 if ( strstr($token_modifiers[$i], '-')!==false)515 if ($token_modifiers[$i]&QST_NOT) 497 516 continue;// ignore this NOT token 498 517 $transliterated_token = $transliterated_tokens[$i]; … … 502 521 while ( ($pos = strpos($transliterated_tag, $transliterated_token, $pos)) !== false) 503 522 { 504 if ( strstr($token_modifiers[$i], '*')!==false)523 if ( ($token_modifiers[$i]&QST_WILDCARD)==QST_WILDCARD ) 505 524 {// wildcard in this token 506 525 $match = 1; … … 509 528 $token_len = strlen($transliterated_token); 510 529 511 $word_begin = $pos; 512 while ($word_begin>0) 530 // search begin of word 531 $wbegin_len=0; $wbegin_char=' '; 532 while ($pos-$wbegin_len > 0) 513 533 { 514 if (! is_word_char($transliterated_tag[$word_begin-1]) ) 534 if (! is_word_char($transliterated_tag[$pos-$wbegin_len-1]) ) 535 { 536 $wbegin_char = $transliterated_tag[$pos-$wbegin_len-1]; 515 537 break; 516 $word_begin--; 517 } 518 519 $word_end = $pos + $token_len; 520 while ($word_end<strlen($transliterated_tag) && is_word_char($transliterated_tag[$word_end]) ) 521 $word_end++; 522 523 $this_score = $token_len / ($word_end-$word_begin); 524 if ($token_len <= 2) 525 {// search for 1 or 2 characters must match exactly to avoid retrieving too much data 526 if ($token_len != $word_end-$word_begin) 527 $this_score = 0; 528 } 529 elseif ($token_len == 3) 538 } 539 $wbegin_len++; 540 } 541 542 // search end of word 543 $wend_len=0; $wend_char=' '; 544 while ($pos+$token_len+$wend_len < strlen($transliterated_tag)) 530 545 { 531 if ($word_end-$word_begin > 4) 532 $this_score = 0; 546 if (! is_word_char($transliterated_tag[$pos+$token_len+$wend_len]) ) 547 { 548 $wend_char = $transliterated_tag[$pos+$token_len+$wend_len]; 549 break; 550 } 551 $wend_len++; 552 } 553 554 $this_score = 0; 555 if ( ($token_modifiers[$i]&QST_WILDCARD)==0 ) 556 {// no wildcard begin or end 557 if ($token_len <= 2) 558 {// search for 1 or 2 characters must match exactly to avoid retrieving too much data 559 if ($wbegin_len==0 && $wend_len==0 && !is_odd_wbreak_begin($wbegin_char) && !is_odd_wbreak_end($wend_char) ) 560 $this_score = 1; 561 } 562 elseif ($token_len == 3) 563 { 564 if ($wbegin_len==0) 565 $this_score = $token_len / ($token_len + $wend_len); 566 } 567 else 568 { 569 $this_score = $token_len / ($token_len + 1.1 * $wbegin_len + 0.9 * $wend_len); 570 } 533 571 } 534 572 … … 547 585 } 548 586 $search_results['qs']['matching_tags']=$all_tags; 587 $debug[] = count($all_tags).' tags'; 549 588 550 589 // Step 2.2 - reduce matching tags for every token in the query search … … 573 612 foreach($token_tags[$i] as $arr) 574 613 $tag_ids[] = $arr['tag_id']; 614 $tag_ids = array_unique($tag_ids); 615 $debug[] = count($tag_ids).' unique tags'; 575 616 576 617 if (!empty($tag_ids)) 577 618 { 619 $tag_photo_count=0; 578 620 $query = ' 579 621 SELECT image_id … … 586 628 $image_id=(int)$row['image_id']; 587 629 @$by_weights[$image_id] += 1; 588 } 630 $tag_photo_count++; 631 } 632 $debug[] = $tag_photo_count.' photos for tags'; 633 $debug[] = count($by_weights).' photos after tags'; 589 634 } 590 635 } … … 612 657 } 613 658 } 659 $debug[] = count(@$search_results['qs']['matching_cats']).' albums with images'; 614 660 615 661 if ( empty($by_weights) and empty($search_results['qs']['matching_cats']) ) … … 655 701 $allowed_images = array_from_query( $query, 'id'); 656 702 703 $debug[] = count($allowed_images).' final photo count -->'; 704 global $template; 705 $template->append('footer_elements', implode(', ', $debug) ); 706 657 707 if ( $super_order_by or empty($by_weights) ) 658 708 {
Note: See TracChangeset
for help on using the changeset viewer.