Ignore:
Timestamp:
Mar 22, 2014, 2:03:45 PM (10 years ago)
Author:
rvelices
Message:

bug 3056: quick search OR operator priority taken into account
search for 'mary qwerty' will ignore 'qwerty' and return only results for 'mary' if there is no such thing as 'qwerty' in the photos (if there was 'mary' and 'qwerty', the results for both 'mary' AND 'qwerty' would be shown)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/functions_search.inc.php

    r27868 r27882  
    306306 */
    307307
     308/** Represents a single word or quoted phrase to be searched.*/
    308309class QSingleToken
    309310{
    310311  var $is_single = true;
    311   var $token;
     312  var $token; /* the actual word/phrase string*/
    312313  var $idx;
    313314
     
    318319}
    319320
     321/** Represents an expression of several words or sub expressions to be searched.*/
    320322class QMultiToken
    321323{
    322324  var $is_single = false;
    323   var $tokens = array();
    324   var $token_modifiers = array();
     325  var $tokens = array(); // the actual array of QSingleToken or QMultiToken
     326  var $token_modifiers = array(); // modifiers (OR,NOT,...) for every token
    325327
    326328  function __toString()
     
    359361  }
    360362
    361   function push(&$token, &$modifier)
     363  private function push(&$token, &$modifier)
    362364  {
    363365    $this->tokens[] = new QSingleToken($token);
     
    367369  }
    368370
     371  /**
     372  * Parses the input query string by tokenizing the input, generating the modifiers (and/or/not/quotation/wildcards...).
     373  * Recursivity occurs when parsing ()
     374  * @param string $q the actual query to be parsed
     375  * @param int $qi the character index in $q where to start parsing
     376  * @param int $level the depth from root in the tree (number of opened and unclosed opening brackets)
     377  */
    369378  protected function parse_expression($q, &$qi, $level)
    370379  {
     
    487496    }
    488497  }
     498
     499  private static function priority($modifier)
     500  {
     501    return $modifier & QST_OR ? 0 :1;
     502  }
     503
     504  /* because evaluations occur left to right, we ensure that 'a OR b c d' is interpreted as 'a OR (b c d)'*/
     505  protected function check_operator_priority()
     506  {
     507    for ($i=0; $i<count($this->tokens); $i++)
     508    {
     509      if (!$this->tokens[$i]->is_single)
     510        $this->tokens[$i]->check_operator_priority();
     511      if ($i==1)
     512        $crt_prio = self::priority($this->token_modifiers[$i]);
     513      if ($i<=1)
     514        continue;
     515      $prio = self::priority($this->token_modifiers[$i]);
     516      if ($prio > $crt_prio)
     517      {// e.g. 'a OR b c d' i=2, operator(c)=AND -> prio(AND) > prio(OR) = operator(b)
     518        $term_count = 2; // at least b and c to be regrouped
     519        for ($j=$i+1; $j<count($this->tokens); $j++)
     520        {
     521          if (self::priority($this->token_modifiers[$j]) >= $prio)
     522            $term_count++; // also take d
     523          else
     524            break;
     525        }
     526
     527        $i--; // move pointer to b
     528        // crate sub expression (b c d)
     529        $sub = new QMultiToken;
     530        $sub->tokens = array_splice($this->tokens, $i, $term_count);
     531        $sub->token_modifiers = array_splice($this->token_modifiers, $i, $term_count);
     532
     533        // rewrite ourseleves as a (b c d)
     534        array_splice($this->tokens, $i, 0, array($sub));
     535        array_splice($this->token_modifiers, $i, 0, array($sub->token_modifiers[0]&QST_OR));
     536        $sub->token_modifiers[0] &= ~QST_OR;
     537
     538        $sub->check_operator_priority();
     539      }
     540      else
     541        $crt_prio = $prio;
     542    }
     543  }
    489544}
    490545
     
    498553    $i = 0;
    499554    $this->parse_expression($q, $i, 0);
    500     //@TODO: manipulate the tree so that 'a OR b c' is the same as 'b c OR a'
    501     $this->build_single_tokens($this);
    502   }
    503 
    504   private function build_single_tokens(QMultiToken $expr)
    505   {
    506     //@TODO: double negation results in no negation in token modifier
     555    //manipulate the tree so that 'a OR b c' is the same as 'b c OR a'
     556    $this->check_operator_priority();
     557    $this->build_single_tokens($this, 0);
     558  }
     559
     560  private function build_single_tokens(QMultiToken $expr, $this_is_not)
     561  {
    507562    for ($i=0; $i<count($expr->tokens); $i++)
    508563    {
    509564      $token = $expr->tokens[$i];
     565      $crt_is_not = ($expr->token_modifiers[$i] ^ $this_is_not) & QST_NOT; // no negation OR double negation -> no negation;
     566
    510567      if ($token->is_single)
    511568      {
    512569        $token->idx = count($this->stokens);
    513570        $this->stokens[] = $token->token;
    514         $this->stoken_modifiers[] = $expr->token_modifiers[$i];
     571
     572        $modifier = $expr->token_modifiers[$i];
     573        if ($crt_is_not)
     574          $modifier |= QST_NOT;
     575        else
     576          $modifier &= ~QST_NOT;
     577        $this->stoken_modifiers[] = $modifier;
    515578      }
    516579      else
    517         $this->build_single_tokens($token);
    518     }
    519   }
    520 }
    521 
     580        $this->build_single_tokens($token, $crt_is_not);
     581    }
     582  }
     583}
     584
     585/**
     586  Structure of results being filled from different tables
     587*/
    522588class QResults
    523589{
     
    730796
    731797
    732 function qsearch_eval(QExpression $expr, QResults $qsr, QMultiToken $crt_expr)
    733 {
     798function qsearch_eval(QMultiToken $expr, QResults $qsr, &$qualifies, &$ignored_terms)
     799{
     800  $qualifies = false; // until we find at least one positive term
     801  $ignored_terms = array();
     802
    734803  $ids = $not_ids = array();
    735   $first = true;
    736   for ($i=0; $i<count($crt_expr->tokens); $i++)
    737   {
    738     $current = $crt_expr->tokens[$i];
    739     if ($current->is_single)
    740     {
    741       $crt_ids = $qsr->iids[$current->idx] = array_unique( array_merge($qsr->images_iids[$current->idx], $qsr->tag_iids[$current->idx]) );
     804
     805  for ($i=0; $i<count($expr->tokens); $i++)
     806  {
     807    $crt = $expr->tokens[$i];
     808    if ($crt->is_single)
     809    {
     810      $crt_ids = $qsr->iids[$crt->idx] = array_unique( array_merge($qsr->images_iids[$crt->idx], $qsr->tag_iids[$crt->idx]) );
     811      $crt_qualifies = count($crt_ids)>0 || count($qsr->tag_ids[$crt->idx])>0;
     812      $crt_ignored_terms = $crt_qualifies ? array() : array($crt->token);
    742813    }
    743814    else
    744       $crt_ids = qsearch_eval($expr, $qsr, $current);
    745     $modifier = $crt_expr->token_modifiers[$i];
    746 
     815      $crt_ids = qsearch_eval($crt, $qsr, $crt_qualifies, $crt_ignored_terms);
     816
     817    $modifier = $expr->token_modifiers[$i];
    747818    if ($modifier & QST_NOT)
    748819      $not_ids = array_unique( array_merge($not_ids, $crt_ids));
    749820    else
    750821    {
     822      $ignored_terms = array_merge($ignored_terms, $crt_ignored_terms);
    751823      if ($modifier & QST_OR)
     824      {
    752825        $ids = array_unique( array_merge($ids, $crt_ids) );
    753       else
    754       {
    755         if ($current->is_single && empty($crt_ids))
    756         {
    757           //@TODO: mark this term as unmatched and tell users
    758           //@TODO: if we don't find a term at all, maybe ignore it and produce some results
    759         }
    760         if ($first)
     826        $qualifies |= $crt_qualifies;
     827      }
     828      elseif ($crt_qualifies)
     829      {
     830        if ($qualifies)
     831          $ids = array_intersect($ids, $crt_ids);
     832        else
    761833          $ids = $crt_ids;
    762         else
    763           $ids = array_intersect($ids, $crt_ids);
    764         $first= false;
     834        $qualifies = true;
    765835      }
    766836    }
     
    779849 *    'items' => array of matching images
    780850 *    'qs'    => array(
     851 *      'unmatched_terms' => array of terms from the input string that were not matched
    781852 *      'matching_tags' => array of matching tags
    782853 *      'matching_cats' => array of matching categories
     
    792863function get_quick_search_results($q, $super_order_by, $images_where='')
    793864{
    794   global $user, $conf;
    795 
     865  global $conf;
     866  //@TODO: maybe cache for 10 minutes the result set to avoid many expensive sql calls when navigating the pictures
    796867  $search_results =
    797868    array(
     
    808879//var_export($qsr->all_tags);
    809880
    810   $ids = qsearch_eval($expression, $qsr, $expression);
     881  $ids = qsearch_eval($expression, $qsr, $tmp, $search_results['qs']['unmatched_terms']);
    811882
    812883  $debug[] = "<!--\nparsed: ".$expression;
Note: See TracChangeset for help on using the changeset viewer.