Changeset 28065 for trunk/include/functions_search.inc.php
- Timestamp:
- Apr 3, 2014, 10:52:38 PM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/functions_search.inc.php
r28064 r28065 297 297 define('QST_WILDCARD', QST_WILDCARD_BEGIN|QST_WILDCARD_END); 298 298 299 300 class QSearchScope 301 { 302 var $id; 303 var $aliases; 304 var $is_text; 305 var $allow_empty; 306 307 function __construct($id, $aliases, $allow_empty=false, $is_text=true) 308 { 309 $this->id = $id; 310 $this->aliases = $aliases; 311 $this->is_text = $is_text; 312 $this->allow_empty =$allow_empty; 313 } 314 } 315 316 class QNumericRangeScope extends QSearchScope 317 { 318 function __construct($id, $aliases, $allow_empty=false) 319 { 320 parent::__construct($id, $aliases, $allow_empty, false); 321 } 322 323 function parse($token) 324 { 325 $str = $token->term; 326 if ( ($pos = strpos($str, '..')) !== false) 327 $range = array( substr($str,0,$pos), substr($str, $pos+2)); 328 else 329 $range = array($str, $str); 330 foreach ($range as $i =>&$val) 331 { 332 if (preg_match('/^([0-9.]+)([km])?/i', $val, $matches)) 333 { 334 $val = floatval($matches[1]); 335 if (isset($matches[2])) 336 { 337 if ($matches[2]=='k' || $matches[2]=='K') 338 { 339 $val *= 1000; 340 if ($i) $val += 999; 341 } 342 if ($matches[2]=='m' || $matches[2]=='M') 343 { 344 $val *= 1000000; 345 if ($i) $val += 999999; 346 } 347 } 348 } 349 else 350 $val = ''; 351 } 352 353 if (!$this->allow_empty && $range[0]=='' && $range[1] == '') 354 return false; 355 $token->scope_data = $range; 356 return true; 357 } 358 359 function get_sql($field, $token) 360 { 361 $clauses = array(); 362 if ($token->scope_data[0]!=='') 363 $clauses[] = $field.' >= ' .$token->scope_data[0].' '; 364 if ($token->scope_data[1]!=='') 365 $clauses[] = $field.' <= ' .$token->scope_data[1].' '; 366 367 if (empty($clauses)) 368 return $field.' IS NULL'; 369 return '('.implode(' AND ', $clauses).')'; 370 } 371 } 372 299 373 /** 300 374 * Analyzes and splits the quick/query search query $q into tokens. … … 310 384 { 311 385 var $is_single = true; 386 var $modifier; 312 387 var $term; /* the actual word/phrase string*/ 388 var $scope; 389 390 var $scope_data; 313 391 var $idx; 314 392 315 function __construct($term )393 function __construct($term, $modifier, $scope) 316 394 { 317 395 $this->term = $term; 318 } 319 396 $this->modifier = $modifier; 397 $this->scope = $scope; 398 } 399 320 400 function __toString() 321 401 { 322 return $this->term; 402 $s = ''; 403 if (isset($this->scope)) 404 $s .= $this->scope->id .':'; 405 if ($this->modifier & QST_WILDCARD_BEGIN) 406 $s .= '*'; 407 if ($this->modifier & QST_QUOTED) 408 $s .= '"'; 409 $s .= $this->term; 410 if ($this->modifier & QST_QUOTED) 411 $s .= '"'; 412 if ($this->modifier & QST_WILDCARD_END) 413 $s .= '*'; 414 return $s; 323 415 } 324 416 } … … 328 420 { 329 421 var $is_single = false; 422 var $modifier; 330 423 var $tokens = array(); // the actual array of QSingleToken or QMultiToken 331 var $token_modifiers = array(); // modifiers (OR,NOT,...) for every token332 424 333 425 function __toString() … … 336 428 for ($i=0; $i<count($this->tokens); $i++) 337 429 { 338 $modifier = $this->token _modifiers[$i];430 $modifier = $this->tokens[$i]->modifier; 339 431 if ($i) 340 432 $s .= ' '; … … 343 435 if ($modifier & QST_NOT) 344 436 $s .= 'NOT '; 345 if ($modifier & QST_WILDCARD_BEGIN)346 $s .= '*';347 if ($modifier & QST_QUOTED)348 $s .= '"';349 437 if (! ($this->tokens[$i]->is_single) ) 350 438 { … … 357 445 $s .= $this->tokens[$i]; 358 446 } 359 if ($modifier & QST_QUOTED)360 $s .= '"';361 if ($modifier & QST_WILDCARD_END)362 $s .= '*';363 364 447 } 365 448 return $s; 366 449 } 367 450 368 private function push(&$token, &$modifier) 369 { 370 $this->tokens[] = new QSingleToken($token); 371 $this->token_modifiers[] = $modifier; 451 private function push(&$token, &$modifier, &$scope) 452 { 453 if (strlen($token) || (isset($scope) && $scope->allow_empty)) 454 { 455 $this->tokens[] = new QSingleToken($token, $modifier, $scope); 456 } 372 457 $token = ""; 373 458 $modifier = 0; 459 $scope = null; 374 460 } 375 461 … … 381 467 * @param int $level the depth from root in the tree (number of opened and unclosed opening brackets) 382 468 */ 383 protected function parse_expression($q, &$qi, $level )469 protected function parse_expression($q, &$qi, $level, $root) 384 470 { 385 471 $crt_token = ""; 386 472 $crt_modifier = 0; 473 $crt_scope = null; 387 474 388 475 for ($stop=false; !$stop && $qi<strlen($q); $qi++) … … 395 482 case '(': 396 483 if (strlen($crt_token)) 397 $this->push($crt_token, $crt_modifier );484 $this->push($crt_token, $crt_modifier, $crt_scope); 398 485 $sub = new QMultiToken; 399 486 $qi++; 400 $sub->parse_expression($q, $qi, $level+1); 487 $sub->parse_expression($q, $qi, $level+1, $root); 488 $sub->modifier = $crt_modifier; 489 if (isset($crt_scope) && $crt_scope->is_text) 490 { 491 $sub->apply_scope($crt_scope); // eg. 'tag:(John OR Bill)' 492 } 401 493 $this->tokens[] = $sub; 402 $this->token_modifiers[] = $crt_modifier;403 494 $crt_modifier = 0; 495 $crt_scope = null; 404 496 break; 405 497 case ')': … … 407 499 $stop = true; 408 500 break; 501 case ':': 502 $scope = @$root->scopes[$crt_token]; 503 if (!isset($scope) || isset($crt_scope)) 504 { // white space 505 $this->push($crt_token, $crt_modifier, $crt_scope); 506 } 507 else 508 { 509 $crt_token = ""; 510 $crt_scope = $scope; 511 } 512 break; 409 513 case '"': 410 514 if (strlen($crt_token)) 411 $this->push($crt_token, $crt_modifier );515 $this->push($crt_token, $crt_modifier, $crt_scope); 412 516 $crt_modifier |= QST_QUOTED; 413 517 break; 414 518 case '-': 415 if (strlen($crt_token) )519 if (strlen($crt_token) || isset($crt_scope)) 416 520 $crt_token .= $ch; 417 521 else … … 424 528 $crt_modifier |= QST_WILDCARD_BEGIN; 425 529 break; 530 case '.': 531 if (isset($crt_scope) && !$crt_scope->is_text) 532 { 533 $crt_token .= $ch; 534 break; 535 } 536 // else white space go on.. 426 537 default: 427 538 if (preg_match('/[\s,.;!\?]+/', $ch)) 428 539 { // white space 429 540 if (strlen($crt_token)) 430 $this->push($crt_token, $crt_modifier );541 $this->push($crt_token, $crt_modifier, $crt_scope); 431 542 $crt_modifier = 0; 432 543 } … … 445 556 $qi++; 446 557 } 447 $this->push($crt_token, $crt_modifier );558 $this->push($crt_token, $crt_modifier, $crt_scope); 448 559 } 449 560 else … … 452 563 } 453 564 454 if (strlen($crt_token)) 455 $this->push($crt_token, $crt_modifier); 565 $this->push($crt_token, $crt_modifier, $crt_scope); 456 566 457 567 for ($i=0; $i<count($this->tokens); $i++) … … 461 571 if ($token->is_single) 462 572 { 463 if ( ($this->token_modifiers[$i]&QST_QUOTED)==0)464 { 465 if ( 'not' == strtolower($token->term))573 if (!isset($token->scope)) 574 { 575 if ( ($token->modifier & QST_QUOTED)==0 ) 466 576 { 467 if ($i+1 < count($this->tokens)) 468 $this->token_modifiers[$i+1] |= QST_NOT; 469 $token->term = ""; 577 if ('not' == strtolower($token->term)) 578 { 579 if ($i+1 < count($this->tokens)) 580 $this->tokens[$i+1]->modifier |= QST_NOT; 581 $token->term = ""; 582 } 583 if ('or' == strtolower($token->term)) 584 { 585 if ($i+1 < count($this->tokens)) 586 $this->token[$i+1]->modifier |= QST_OR; 587 $token->term = ""; 588 } 589 if ('and' == strtolower($token->term)) 590 { 591 $token->term = ""; 592 } 593 if ( substr($token->term, -1)=='*' ) 594 { 595 $token->term = rtrim($token->term, '*'); 596 $token->modifier |= QST_WILDCARD_END; 597 } 470 598 } 471 if ('or' == strtolower($token->term)) 472 { 473 if ($i+1 < count($this->tokens)) 474 $this->token_modifiers[$i+1] |= QST_OR; 475 $token->term = ""; 476 } 477 if ('and' == strtolower($token->term)) 478 { 479 $token->term = ""; 480 } 481 if ( substr($token->term, -1)=='*' ) 482 { 483 $token->term = rtrim($token->term, '*'); 484 $this->token_modifiers[$i] |= QST_WILDCARD_END; 485 } 486 } 487 if (!strlen($token->term)) 488 $remove = true; 599 if (!strlen($token->term)) 600 $remove = true; 601 } 602 elseif (!$token->scope->is_text) 603 { 604 if (!$token->scope->parse($token)) 605 $remove = true; 606 } 489 607 } 490 608 else … … 496 614 { 497 615 array_splice($this->tokens, $i, 1); 498 array_splice($this->token_modifiers, $i, 1);499 616 $i--; 500 617 } … … 515 632 $this->tokens[$i]->check_operator_priority(); 516 633 if ($i==1) 517 $crt_prio = self::priority($this->token _modifiers[$i]);634 $crt_prio = self::priority($this->tokens[$i]->modifier); 518 635 if ($i<=1) 519 636 continue; 520 $prio = self::priority($this->token _modifiers[$i]);637 $prio = self::priority($this->tokens[$i]->modifier); 521 638 if ($prio > $crt_prio) 522 639 {// e.g. 'a OR b c d' i=2, operator(c)=AND -> prio(AND) > prio(OR) = operator(b) … … 524 641 for ($j=$i+1; $j<count($this->tokens); $j++) 525 642 { 526 if (self::priority($this->token _modifiers[$j]) >= $prio)643 if (self::priority($this->tokens[$j]->modifier) >= $prio) 527 644 $term_count++; // also take d 528 645 else … … 534 651 $sub = new QMultiToken; 535 652 $sub->tokens = array_splice($this->tokens, $i, $term_count); 536 $sub->token_modifiers = array_splice($this->token_modifiers, $i, $term_count);537 653 538 654 // rewrite ourseleves as a (b c d) 539 655 array_splice($this->tokens, $i, 0, array($sub)); 540 array_splice($this->token_modifiers, $i, 0, array($sub->token_modifiers[0]&QST_OR));541 $sub->token _modifiers[0]&= ~QST_OR;656 $sub->modifier = $sub->tokens[0]->modifier & QST_OR; 657 $sub->tokens[0]->modifier &= ~QST_OR; 542 658 543 659 $sub->check_operator_priority(); … … 551 667 class QExpression extends QMultiToken 552 668 { 669 var $scopes = array(); 553 670 var $stokens = array(); 554 671 var $stoken_modifiers = array(); 555 672 556 function __construct($q) 557 { 673 function __construct($q, $scopes) 674 { 675 foreach ($scopes as $scope) 676 { 677 $this->scopes[$scope->id] = $scope; 678 foreach ($scope->aliases as $alias) 679 $this->scopes[strtolower($alias)] = $scope; 680 } 558 681 $i = 0; 559 $this->parse_expression($q, $i, 0 );682 $this->parse_expression($q, $i, 0, $this); 560 683 //manipulate the tree so that 'a OR b c' is the same as 'b c OR a' 561 684 $this->check_operator_priority(); … … 568 691 { 569 692 $token = $expr->tokens[$i]; 570 $crt_is_not = ($ expr->token_modifiers[$i]^ $this_is_not) & QST_NOT; // no negation OR double negation -> no negation;693 $crt_is_not = ($token->modifier ^ $this_is_not) & QST_NOT; // no negation OR double negation -> no negation; 571 694 572 695 if ($token->is_single) … … 575 698 $this->stokens[] = $token; 576 699 577 $modifier = $ expr->token_modifiers[$i];700 $modifier = $token->modifier; 578 701 if ($crt_is_not) 579 702 $modifier |= QST_NOT; … … 604 727 function qsearch_get_images(QExpression $expr, QResults $qsr) 605 728 { 606 //@TODO: inflections for english / french607 729 $qsr->images_iids = array_fill(0, count($expr->tokens), array()); 608 730 … … 619 741 for ($i=0; $i<count($expr->stokens); $i++) 620 742 { 621 $token = $expr->stokens[$i]->term; 743 $token = $expr->stokens[$i]; 744 $term = $token->term; 745 $scope_id = isset($token->scope) ? $token->scope->id : 'photo'; 622 746 $clauses = array(); 623 747 624 $like = addslashes($t oken);748 $like = addslashes($term); 625 749 $like = str_replace( array('%','_'), array('\\%','\\_'), $like); // escape LIKE specials %_ 626 $clauses[] = 'CONVERT(file, CHAR) LIKE \'%'.$like.'%\''; 627 628 if ($inflector!=null && strlen($token)>2 629 && ($expr->stoken_modifiers[$i] & (QST_QUOTED|QST_WILDCARD))==0 630 && strcspn($token, '\'0123456789') == strlen($token) 631 ) 632 { 633 $variants = array_unique( array_diff( $inflector->get_variants($token), array($token) ) ); 634 $qsr->variants[$token] = $variants; 635 } 636 else 637 { 638 $variants = array(); 639 } 640 641 if (strlen($token)>3) // default minimum full text index 642 { 643 $ft = $token; 644 if ($expr->stoken_modifiers[$i] & QST_QUOTED) 645 $ft = '"'.$ft.'"'; 646 if ($expr->stoken_modifiers[$i] & QST_WILDCARD_END) 647 $ft .= '*'; 648 foreach ($variants as $variant) 649 { 650 $ft.=' '.$variant; 651 } 652 $clauses[] = 'MATCH(i.name, i.comment) AGAINST( \''.addslashes($ft).'\' IN BOOLEAN MODE)'; 653 } 654 else 655 { 656 foreach( array('i.name', 'i.comment') as $field) 657 { 658 /*$clauses[] = $field.' LIKE \''.$like.' %\''; 659 $clauses[] = $field.' LIKE \'% '.$like.'\''; 660 $clauses[] = $field.' LIKE \'% '.$like.' %\'';*/ 661 $clauses[] = $field.' REGEXP \'[[:<:]]'.addslashes(preg_quote($token)).'[[:>:]]\''; 662 } 663 } 664 $query = $query_base.'('.implode(' OR ', $clauses).')'; 665 $qsr->images_iids[$i] = query2array($query,null,'id'); 750 $file_like = 'CONVERT(file, CHAR) LIKE \'%'.$like.'%\''; 751 752 switch ($scope_id) 753 { 754 case 'photo': 755 $clauses[] = $file_like; 756 757 if ($inflector!=null && strlen($term)>2 758 && ($expr->stoken_modifiers[$i] & (QST_QUOTED|QST_WILDCARD))==0 759 && strcspn($term, '\'0123456789') == strlen($term) 760 ) 761 { 762 $variants = array_unique( array_diff( $inflector->get_variants($term), array($term) ) ); 763 $qsr->variants[$term] = $variants; 764 } 765 else 766 { 767 $variants = array(); 768 } 769 770 if (strlen($term)>3) // default minimum full text index 771 { 772 $ft = $term; 773 if ($expr->stoken_modifiers[$i] & QST_QUOTED) 774 $ft = '"'.$ft.'"'; 775 if ($expr->stoken_modifiers[$i] & QST_WILDCARD_END) 776 $ft .= '*'; 777 foreach ($variants as $variant) 778 { 779 $ft.=' '.$variant; 780 } 781 $clauses[] = 'MATCH(i.name, i.comment) AGAINST( \''.addslashes($ft).'\' IN BOOLEAN MODE)'; 782 } 783 else 784 { 785 foreach( array('i.name', 'i.comment') as $field) 786 { 787 $clauses[] = $field.' REGEXP \'[[:<:]]'.addslashes(preg_quote($term)).'[[:>:]]\''; 788 } 789 } 790 break; 791 792 case 'file': 793 $clauses[] = $file_like; 794 break; 795 case 'width': 796 case 'height': 797 case 'hits': 798 case 'rating_score': 799 $clauses[] = $token->scope->get_sql($scope_id, $token); 800 break; 801 case 'ratio': 802 $clauses[] = $token->scope->get_sql('width/height', $token); 803 break; 804 case 'size': 805 $clauses[] = $token->scope->get_sql('width*height', $token); 806 break; 807 case 'filesize': 808 $clauses[] = $token->scope->get_sql('filesize', $token); 809 break; 810 811 } 812 if (!empty($clauses)) 813 { 814 $query = $query_base.'('.implode(' OR ', $clauses).')'; 815 $qsr->images_iids[$i] = query2array($query,null,'id'); 816 } 666 817 } 667 818 } … … 679 830 foreach ($tokens as $token) 680 831 { 681 $transliterated_tokens[] = transliterate($token->term); 832 if (!isset($token->scope) || 'tag' == $token->scope) 833 { 834 $transliterated_tokens[] = transliterate($token->term); 835 } 836 else 837 { 838 $transliterated_tokens[] = ''; 839 } 682 840 } 683 841 … … 696 854 { 697 855 $transliterated_token = $transliterated_tokens[$i]; 856 if (strlen($transliterated_token)==0) 857 continue; 698 858 699 859 $match = false; … … 831 991 $qsr->tag_iids[$i] = query2array($query, null, 'image_id'); 832 992 } 993 elseif (isset($tokens[$i]->scope) && 'tag' == $tokens[$i]->scope->id && strlen($token->term)==0) 994 { 995 if ($tokens[$i]->modifier & QST_WILDCARD) 996 {// eg. 'tag:*' returns all tagged images 997 $qsr->tag_iids[$i] = query2array('SELECT DISTINCT image_id FROM '.IMAGE_TAG_TABLE, null, 'image_id'); 998 } 999 else 1000 {// eg. 'tag:' returns all untagged images 1001 $qsr->tag_iids[$i] = query2array('SELECT id FROM '.IMAGES_TABLE.' LEFT JOIN '.IMAGE_TAG_TABLE.' ON id=image_id WHERE image_id IS NULL', null, 'id'); 1002 } 1003 } 833 1004 } 834 1005 } … … 854 1025 $crt_ids = qsearch_eval($crt, $qsr, $crt_qualifies, $crt_ignored_terms); 855 1026 856 $modifier = $ expr->token_modifiers[$i];1027 $modifier = $crt->modifier; 857 1028 if ($modifier & QST_NOT) 858 1029 $not_ids = array_unique( array_merge($not_ids, $crt_ids)); … … 911 1082 ); 912 1083 913 $expression = new QExpression($q); 1084 $scopes = array(); 1085 $scopes[] = new QSearchScope('tag', array('tags')); 1086 $scopes[] = new QSearchScope('photo', array('photos')); 1087 $scopes[] = new QSearchScope('file', array('filename')); 1088 $scopes[] = new QNumericRangeScope('width', array()); 1089 $scopes[] = new QNumericRangeScope('height', array()); 1090 $scopes[] = new QNumericRangeScope('ratio', array()); 1091 $scopes[] = new QNumericRangeScope('size', array()); 1092 $scopes[] = new QNumericRangeScope('filesize', array()); 1093 $scopes[] = new QNumericRangeScope('hits', array('hit', 'visit', 'visits')); 1094 $scopes[] = new QNumericRangeScope('rating_score', array('score'), true); 1095 $expression = new QExpression($q, $scopes); 914 1096 //var_export($expression); 915 1097
Note: See TracChangeset
for help on using the changeset viewer.