Ignore:
Timestamp:
Jul 23, 2008, 2:56:53 AM (16 years ago)
Author:
rvelices
Message:
  • merge r2451 from trunk: normalize behaviour of query search versus std search (now both return items already sorted and permission checked); also more optimized sql queries (in some cases)
File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/branch-1_7/include/functions_search.inc.php

    r2044 r2452  
    198198 * @return array
    199199 */
    200 function get_regular_search_results($search)
     200function get_regular_search_results($search, $images_where)
    201201{
     202  global $conf;
     203  $forbidden = get_sql_condition_FandF(
     204        array
     205          (
     206            'forbidden_categories' => 'category_id',
     207            'visible_categories' => 'category_id',
     208            'visible_images' => 'id'
     209          ),
     210        "\n  AND"
     211    );
     212
    202213  $items = array();
    203 
    204   $search_clause = get_sql_search_clause($search);
    205 
    206   if (!empty($search_clause))
    207   {
    208     $query = '
    209 SELECT DISTINCT(id)
    210   FROM '.IMAGES_TABLE.'
    211     INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
    212   WHERE '.$search_clause.'
    213 ;';
    214     $items = array_from_query($query, 'id');
    215   }
     214  $tag_items = array();
    216215
    217216  if (isset($search['fields']['tags']))
     
    221220      $search['fields']['tags']['mode']
    222221      );
    223 
     222  }
     223
     224  $search_clause = get_sql_search_clause($search);
     225
     226  if (!empty($search_clause))
     227  {
     228    $query = '
     229SELECT DISTINCT(id)
     230  FROM '.IMAGES_TABLE.' i
     231    INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
     232  WHERE '.$search_clause;
     233    if (!empty($images_where))
     234    {
     235      $query .= "\n  AND ".$images_where;
     236    }
     237    if (empty($tag_items) or $search['mode']=='AND')
     238    { // directly use forbidden and order by
     239      $query .= $forbidden.'
     240  '.$conf['order_by'];
     241    }
     242    $items = array_from_query($query, 'id');
     243  }
     244
     245  if ( !empty($tag_items) )
     246  {
     247    $need_permission_check = false;
    224248    switch ($search['mode'])
    225249    {
    226250      case 'AND':
    227       {
    228251        if (empty($search_clause))
    229252        {
     253          $need_permission_check = true;
    230254          $items = $tag_items;
    231255        }
     
    235259        }
    236260        break;
    237       }
    238261      case 'OR':
    239       {
     262        $before_count = count($items);
    240263        $items = array_unique(
    241264          array_merge(
     
    244267            )
    245268          );
     269        if ( $before_count < count($items) )
     270        {
     271          $need_permission_check = true;
     272        }
    246273        break;
     274    }
     275    if ($need_permission_check and count($items) )
     276    {
     277      $query = '
     278SELECT DISTINCT(id)
     279  FROM '.IMAGES_TABLE.' i
     280    INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
     281  WHERE id IN ('.implode(',', $items).') '.$forbidden;
     282      if (!empty($images_where))
     283      {
     284        $query .= "\n  AND ".$images_where;
    247285      }
     286      $query .= '
     287  '.$conf['order_by'];
     288      $items = array_from_query($query, 'id');
    248289    }
    249290  }
     
    254295/**
    255296 * returns the LIKE sql clause corresponding to the quick search query $q
    256  * and the field $field. example q="john bill", field="file" will return
    257  * file LIKE "%john%" OR file LIKE "%bill%". Special characters for MySql
    258  * full text search (+,<,>) are omitted.
     297 * and the field $field. example q='john bill', field='file' will return
     298 * file LIKE '%john%' OR file LIKE '%bill%'. Special characters for MySql full
     299 * text search (+,<,>,~) are omitted. The query can contain a phrase:
     300 * 'Pierre "New York"' will return LIKE '%Pierre%' OR LIKE '%New York%'.
    259301 * @param string q
    260302 * @param string field
     
    263305function get_qsearch_like_clause($q, $field)
    264306{
    265   $tokens = preg_split('/[\s,.;!\?]+/', $q);
     307  $q = stripslashes($q);
     308  $tokens = array();
     309  $token_modifiers = array();
     310  $crt_token = "";
     311  $crt_token_modifier = "";
     312  $state = 0;
     313
     314  for ($i=0; $i<strlen($q); $i++)
     315  {
     316    $ch = $q[$i];
     317    switch ($state)
     318    {
     319      case 0:
     320        if ($ch=='"')
     321        {
     322          if (strlen($crt_token))
     323          {
     324            $tokens[] = $crt_token;
     325            $token_modifiers[] = $crt_token_modifier;
     326            $crt_token = "";
     327            $crt_token_modifier = "";
     328          }
     329          $state=1;
     330        }
     331        elseif ( $ch=='*' )
     332        { // wild card
     333          $crt_token .= '%';
     334        }
     335        elseif ( strcspn($ch, '+-><~')==0 )
     336        { //special full text modifier
     337          if (strlen($crt_token))
     338          {
     339            $tokens[] = $crt_token;
     340            $token_modifiers[] = $crt_token_modifier;
     341            $crt_token = "";
     342            $crt_token_modifier = "";
     343          }
     344          $crt_token_modifier .= $ch;
     345        }
     346        elseif (preg_match('/[\s,.;!\?]+/', $ch))
     347        { // white space
     348          if (strlen($crt_token))
     349          {
     350            $tokens[] = $crt_token;
     351            $token_modifiers[] = $crt_token_modifier;
     352            $crt_token = "";
     353            $crt_token_modifier = "";
     354          }
     355        }
     356        else
     357        {
     358          $crt_token .= $ch;
     359        }
     360        break;
     361      case 1: // qualified with quotes
     362        switch ($ch)
     363        {
     364          case '"':
     365            $tokens[] = $crt_token;
     366            $token_modifiers[] = $crt_token_modifier;
     367            $crt_token = "";
     368            $crt_token_modifier = "";
     369            $state=0;
     370            break;
     371          default:
     372            $crt_token .= $ch;
     373        }
     374        break;
     375    }
     376  }
     377  if (strlen($crt_token))
     378  {
     379    $tokens[] = $crt_token;
     380    $token_modifiers[] = $crt_token_modifier;
     381  }
     382
     383  $clauses = array();
    266384  for ($i=0; $i<count($tokens); $i++)
    267385  {
    268     $tokens[$i]=str_replace('*','%', $tokens[$i]);
    269     if (preg_match('/^[+<>]/',$tokens[$i]) )
    270       $tokens[$i]=substr($tokens[$i], 1);
    271     else if (substr($tokens[$i], 0, 1)=='-')
    272     {
    273       unset($tokens[$i]);
    274       $i--;
    275     }
    276   }
    277 
    278   if (!empty($tokens))
    279   {
    280     $query = '(';
    281     for ($i=0; $i<count($tokens); $i++)
    282     {
    283       if ($i>0) $query .= 'OR ';
    284       $query .= ' '.$field.' LIKE "%'.$tokens[$i].'%" ';
    285     }
    286     $query .= ')';
    287     return $query;
    288   }
    289   return null;
     386    $tokens[$i] = trim($tokens[$i], '%');
     387    if (strstr($token_modifiers[$i], '-')!==false)
     388      continue;
     389    if ( strlen($tokens[$i])==0)
     390      continue;
     391    $clauses[] = $field.' LIKE "%'.addslashes($tokens[$i]).'%"';
     392  }
     393
     394  return count($clauses) ? '('.implode(' OR ', $clauses).')' : null;
    290395}
    291396
    292397
    293398/**
    294  * returns the search results (array of image ids) corresponding to a
    295  * quick/query search. A quick/query search returns many items (search is
    296  * not strict), but results are sorted by relevance.
     399 * returns the search results corresponding to a quick/query search.
     400 * A quick/query search returns many items (search is not strict), but results
     401 * are sorted by relevance unless $super_order_by is true. Returns:
     402 * array (
     403 * 'items' => array(85,68,79...)
     404 * 'qs'    => array(
     405 *    'matching_tags' => array of matching tags
     406 *    'matching_cats' => array of matching categories
     407 *    'matching_cats_no_images' =>array(99) - matching categories without images
     408 *      ))
    297409 *
    298410 * @param string q
     411 * @param bool super_order_by
     412 * @param string images_where optional aditional restriction on images table
    299413 * @return array
    300414 */
    301 function get_quick_search_results($q)
     415function get_quick_search_results($q, $super_order_by, $images_where='')
    302416{
    303   global $page;
    304   $search_results = array();
     417  $search_results =
     418    array(
     419      'items' => array(),
     420      'qs' => array('q'=>stripslashes($q)),
     421    );
    305422  $q = trim($q);
    306423  if (empty($q))
    307424  {
    308     $search_results['items'] = array();
    309425    return $search_results;
    310426  }
    311   // prepare the big join on images, comments and categories
     427  $q_like_field = '@@__db_field__@@'; //something never in a search
     428  $q_like_clause = get_qsearch_like_clause($q, $q_like_field );
     429
     430
     431  // Step 1 - first we find matches in #images table ===========================
     432  $where_clauses='MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)';
     433  if (!empty($q_like_clause))
     434  {
     435    $where_clauses .= '
     436    OR '. str_replace($q_like_field, 'file', $q_like_clause);
     437    $where_clauses = '('.$where_clauses.')';
     438  }
     439  $where_clauses = array($where_clauses);
     440  if (!empty($images_where))
     441  {
     442    $where_clauses[]='('.$images_where.')';
     443  }
     444  $where_clauses[] .= get_sql_condition_FandF
     445      (
     446        array( 'visible_images' => 'i.id' ), null, true
     447      );
    312448  $query = '
    313 SELECT
    314   i.id, CAST( CONCAT_WS(" ",
    315     IFNULL(i.name,""),
    316     IFNULL(i.comment,""),
    317     IFNULL(GROUP_CONCAT(DISTINCT co.content),""),
    318     IFNULL(GROUP_CONCAT(DISTINCT c.dir),""),
    319     IFNULL(GROUP_CONCAT(DISTINCT c.name),""),
    320     IFNULL(GROUP_CONCAT(DISTINCT c.comment),"") ) AS CHAR) AS ft
    321 FROM (
    322   (
    323     '.IMAGES_TABLE.' i LEFT JOIN '.COMMENTS_TABLE.' co on i.id=co.image_id
    324   )
    325     INNER JOIN
    326   '.IMAGE_CATEGORY_TABLE.' ic on ic.image_id=i.id
    327   )
    328     INNER JOIN
    329   '.CATEGORIES_TABLE.' c on c.id=ic.category_id
    330 '.get_sql_condition_FandF
    331   (
    332     array
    333       (
    334         'forbidden_categories' => 'category_id',
    335         'visible_categories' => 'category_id',
    336         'visible_images' => 'i.id'
    337       ),
    338     'WHERE'
    339   ).'
    340 GROUP BY i.id';
    341 
    342   $query = 'SELECT id, MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS q FROM ('.$query.') AS Y
    343 WHERE MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE)';
     449SELECT i.id,
     450    MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS weight
     451  FROM '.IMAGES_TABLE.' i
     452  WHERE '.implode("\n AND ", $where_clauses);
    344453
    345454  $by_weights=array();
     
    347456  while ($row = mysql_fetch_array($result))
    348457  { // weight is important when sorting images by relevance
    349     if ($row['q'])
    350     {
    351       $by_weights[(int)$row['id']] =  2*$row['q'];
    352     }
    353   }
    354 
    355   $permissions_checked = true;
    356   // now search the file name separately (not done in full text because slower
    357   // and the filename in pwg doesn't have spaces so full text is meaningless )
    358   $q_like_clause = get_qsearch_like_clause($q, 'file' );
     458    if ($row['weight'])
     459    {
     460      $by_weights[(int)$row['id']] =  2*$row['weight'];
     461    }
     462    else
     463    {//full text does not match but file name match
     464      $by_weights[(int)$row['id']] =  2;
     465    }
     466  }
     467
     468
     469  // Step 2 - search tags corresponding to the query $q ========================
    359470  if (!empty($q_like_clause))
    360   {
     471  { // search name and url name (without accents)
    361472    $query = '
    362 SELECT id
    363   FROM '.IMAGES_TABLE.'
    364   WHERE '.$q_like_clause.
    365       get_sql_condition_FandF
    366       (
    367         array
    368           (
    369             'visible_images' => 'id'
    370           ),
    371         'AND'
    372       );
    373     $result = pwg_query($query);
    374     while ($row = mysql_fetch_assoc($result))
    375     { // weight is important when sorting images by relevance
    376       $id=(int)$row['id'];
    377       @$by_weights[$id] += 2;
    378       $permissions_checked = false;
    379     }
    380   }
    381 
    382   // now search tag names corresponding to the query $q. we could have searched
    383   // tags earlier during the big join, but for the sake of the performance and
    384   // because tags have only a simple name we do it separately
    385   $q_like_clause = get_qsearch_like_clause($q, 'CONVERT(name, CHAR)' );
    386   if (!empty($q_like_clause))
    387   {
    388     // search also by url name (without accents)
    389     $q_like_clause_url = get_qsearch_like_clause($q, 'url_name' );
    390     $query = '
    391 SELECT id
     473SELECT id, name, url_name
    392474  FROM '.TAGS_TABLE.'
    393   WHERE '.$q_like_clause.'
    394   OR '.$q_like_clause_url;
    395     $tag_ids = array_from_query( $query, 'id');
    396     if (!empty($tag_ids))
    397     { // we got some tags
     475  WHERE ('.str_replace($q_like_field, 'CONVERT(name, CHAR)', $q_like_clause).'
     476    OR '.str_replace($q_like_field, 'url_name', $q_like_clause).')';
     477    $tags = hash_from_query($query, 'id');
     478    if ( !empty($tags) )
     479    { // we got some tags; get the images
     480      $search_results['qs']['matching_tags']=$tags;
    398481      $query = '
    399 SELECT image_id, COUNT(tag_id) AS q
     482SELECT image_id, COUNT(tag_id) AS weight
    400483  FROM '.IMAGE_TAG_TABLE.'
    401   WHERE tag_id IN ('.implode(',',$tag_ids).')
     484  WHERE tag_id IN ('.implode(',',array_keys($tags)).')
    402485  GROUP BY image_id';
    403486      $result = pwg_query($query);
     
    405488      { // weight is important when sorting images by relevance
    406489        $image_id=(int)$row['image_id'];
    407         @$by_weights[$image_id] += $row['q'];
    408         $permissions_checked = false;
     490        @$by_weights[$image_id] += $row['weight'];
    409491      }
    410492    }
    411493  }
    412494
    413   //at this point, found images might contain images not allowed for the user
    414   if ( !$permissions_checked
    415        and !empty($by_weights)
    416        and !isset($page['super_order_by']) )
    417   {
    418     // before returning the result "as is", make sure the user has the
    419     // permissions for every item
    420     global $conf;
    421     $query = '
     495
     496  // Step 3 - search categories corresponding to the query $q ==================
     497  global $user;
     498  $query = '
     499SELECT id, name, permalink, nb_images
     500  FROM '.CATEGORIES_TABLE.'
     501    INNER JOIN '.USER_CACHE_CATEGORIES_TABLE.' ON id=cat_id
     502  WHERE user_id='.$user['id'].'
     503    AND MATCH(name, comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)'.
     504  get_sql_condition_FandF (
     505      array( 'visible_categories' => 'cat_id' ), "\n    AND"
     506    );
     507  $result = pwg_query($query);
     508  while ($row = mysql_fetch_assoc($result))
     509  { // weight is important when sorting images by relevance
     510    if ($row['nb_images']==0)
     511    {
     512      $search_results['qs']['matching_cats_no_images'][] = $row;
     513    }
     514    else
     515    {
     516      $search_results['qs']['matching_cats'][$row['id']] = $row;
     517    }
     518  }
     519
     520  if ( empty($by_weights) and empty($search_results['qs']['matching_cats']) )
     521  {
     522    return $search_results;
     523  }
     524
     525  // Step 4 - now we have $by_weights ( array image id => weight ) that need
     526  // permission checks and/or matching categories to get images from
     527  $where_clauses = array();
     528  if ( !empty($by_weights) )
     529  {
     530    $where_clauses[]='i.id IN ('
     531      . implode(',', array_keys($by_weights)) . ')';
     532  }
     533  if ( !empty($search_results['qs']['matching_cats']) )
     534  {
     535    $where_clauses[]='category_id IN ('.
     536      implode(',',array_keys($search_results['qs']['matching_cats'])).')';
     537  }
     538  $where_clauses = array( '('.implode("\n    OR ",$where_clauses).')' );
     539  if (!empty($images_where))
     540  {
     541    $where_clauses[]='('.$images_where.')';
     542  }
     543  $where_clauses[] = get_sql_condition_FandF(
     544      array
     545        (
     546          'forbidden_categories' => 'category_id',
     547          'visible_categories' => 'category_id',
     548          'visible_images' => 'i.id'
     549        ),
     550      null,true
     551    );
     552
     553  global $conf;
     554  $query = '
    422555SELECT DISTINCT(id)
    423   FROM '.IMAGES_TABLE.'
     556  FROM '.IMAGES_TABLE.' i
    424557    INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
    425   WHERE id IN ('.implode(',', array_keys($by_weights) ).')
    426 '.get_sql_condition_FandF
    427   (
    428     array
    429       (
    430         'forbidden_categories' => 'category_id',
    431         'visible_categories' => 'category_id',
    432         'visible_images' => 'id'
    433       ),
    434     'AND'
    435   ).'
    436   '.$conf['order_by'];
    437     $allowed_images = array_flip( array_from_query( $query, 'id') );
    438     $by_weights = array_intersect_key($by_weights, $allowed_images );
    439     $divisor = 4.0 * count($allowed_images);
    440     // decrease weight from 0 to 0.25 corresponding to the order
    441     foreach ($allowed_images as $id=>$rank )
    442     {
    443       $by_weights[$id] -=  $rank / $divisor;
    444     }
    445     $permissions_checked = true;
    446   }
    447   arsort($by_weights, SORT_NUMERIC);
    448   if ( $permissions_checked )
    449   {
    450     $search_results['as_is']=1;
    451   }
    452  
    453   $search_results['items'] = array_keys($by_weights);
     558  WHERE '.implode("\n AND ", $where_clauses)."\n".
     559  $conf['order_by'];
     560
     561  $allowed_images = array_from_query( $query, 'id');
     562
     563  if ( $super_order_by or empty($by_weights) )
     564  {
     565    $search_results['items'] = $allowed_images;
     566    return $search_results;
     567  }
     568
     569  $allowed_images = array_flip( $allowed_images );
     570  $divisor = 5.0 * count($allowed_images);
     571  foreach ($allowed_images as $id=>$rank )
     572  {
     573    $weight = isset($by_weights[$id]) ? $by_weights[$id] : 1;
     574    $weight -= $rank/$divisor;
     575    $allowed_images[$id] = $weight;
     576  }
     577  arsort($allowed_images, SORT_NUMERIC);
     578  $search_results['items'] = array_keys($allowed_images);
    454579  return $search_results;
    455580}
     
    459584 *
    460585 * @param int search id
     586 * @param string images_where optional aditional restriction on images table
    461587 * @return array
    462588 */
    463 function get_search_results($search_id)
     589function get_search_results($search_id, $super_order_by, $images_where='')
    464590{
    465591  $search = get_search_array($search_id);
    466592  if ( !isset($search['q']) )
    467593  {
    468     $result['items'] = get_regular_search_results($search);
     594    $result['items'] = get_regular_search_results($search, $images_where);
    469595    return $result;
    470596  }
    471597  else
    472598  {
    473     return get_quick_search_results($search['q']);
     599    return get_quick_search_results($search['q'], $super_order_by, $images_where);
    474600  }
    475601}
Note: See TracChangeset for help on using the changeset viewer.