Ignore:
Timestamp:
Oct 12, 2007, 5:27:34 AM (17 years ago)
Author:
rvelices
Message:
  • fix plugin menu link broken with xamp (realpath behaves differently)
  • complete quick search rewriting
    • now we can quote phrases as in google "New York" is not the same as New York
    • user comments not searched anymore (faster)
    • the big full text query does not use joins anymore (faster)
    • related tags not shown on the index page, but now you can see the matching tags and matching categories
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/functions_search.inc.php

    r2043 r2135  
    254254/**
    255255 * returns the LIKE sql clause corresponding to the quick search query $q
    256  * and the field $field. example q="john bill", field="file" will return
    257  * file LIKE "%john%" OR file LIKE "%bill%". Special characters for MySql
    258  * full text search (+,<,>) are omitted.
     256 * and the field $field. example q='john bill', field='file' will return
     257 * file LIKE '%john%' OR file LIKE '%bill%'. Special characters for MySql full
     258 * text search (+,<,>,~) are omitted. The query can contain a phrase:
     259 * 'Pierre "New York"' will return LIKE '%Pierre%' OR LIKE '%New York%'.
    259260 * @param string q
    260261 * @param string field
     
    263264function get_qsearch_like_clause($q, $field)
    264265{
    265   $tokens = preg_split('/[\s,.;!\?]+/', $q);
     266  $q = stripslashes($q);
     267  $tokens = array();
     268  $token_modifiers = array();
     269  $crt_token = "";
     270  $crt_token_modifier = "";
     271  $state = 0;
     272
     273  for ($i=0; $i<strlen($q); $i++)
     274  {
     275    $ch = $q[$i];
     276    switch ($state)
     277    {
     278      case 0:
     279        if ($ch=='"')
     280        {
     281          if (strlen($crt_token))
     282          {
     283            $tokens[] = $crt_token;
     284            $token_modifiers[] = $crt_token_modifier;
     285            $crt_token = "";
     286            $crt_token_modifier = "";
     287          }
     288          $state=1;
     289        }
     290        elseif ( $ch=='*' )
     291        { // wild card
     292          $crt_token .= '%';
     293        }
     294        elseif ( strcspn($ch, '+-><~')==0 )
     295        { //special full text modifier
     296          if (strlen($crt_token))
     297          {
     298            $tokens[] = $crt_token;
     299            $token_modifiers[] = $crt_token_modifier;
     300            $crt_token = "";
     301            $crt_token_modifier = "";
     302          }
     303          $crt_token_modifier .= $ch;
     304        }
     305        elseif (preg_match('/[\s,.;!\?]+/', $ch))
     306        { // white space
     307          if (strlen($crt_token))
     308          {
     309            $tokens[] = $crt_token;
     310            $token_modifiers[] = $crt_token_modifier;
     311            $crt_token = "";
     312            $crt_token_modifier = "";
     313          }
     314        }
     315        else
     316        {
     317          $crt_token .= $ch;
     318        }
     319        break;
     320      case 1: // qualified with quotes
     321        switch ($ch)
     322        {
     323          case '"':
     324            $tokens[] = $crt_token;
     325            $token_modifiers[] = $crt_token_modifier;
     326            $crt_token = "";
     327            $crt_token_modifier = "";
     328            $state=0;
     329            break;
     330          default:
     331            $crt_token .= $ch;
     332        }
     333        break;
     334    }
     335  }
     336  if (strlen($crt_token))
     337  {
     338    $tokens[] = $crt_token;
     339    $token_modifiers[] = $crt_token_modifier;
     340  }
     341
     342  $clauses = array();
    266343  for ($i=0; $i<count($tokens); $i++)
    267344  {
    268     $tokens[$i]=str_replace('*','%', $tokens[$i]);
    269     if (preg_match('/^[+<>]/',$tokens[$i]) )
    270       $tokens[$i]=substr($tokens[$i], 1);
    271     else if (substr($tokens[$i], 0, 1)=='-')
    272     {
    273       unset($tokens[$i]);
    274       $i--;
    275     }
    276   }
    277 
    278   if (!empty($tokens))
    279   {
    280     $query = '(';
    281     for ($i=0; $i<count($tokens); $i++)
    282     {
    283       if ($i>0) $query .= 'OR ';
    284       $query .= ' '.$field.' LIKE "%'.$tokens[$i].'%" ';
    285     }
    286     $query .= ')';
    287     return $query;
    288   }
    289   return null;
     345    $tokens[$i] = trim($tokens[$i], '%');
     346    if (strstr($token_modifiers[$i], '-')!==false)
     347      continue;
     348    if ( strlen($tokens[$i])==0)
     349      continue;
     350    $clauses[] = $field.' LIKE "%'.addslashes($tokens[$i]).'%"';
     351  }
     352
     353  return count($clauses) ? '('.implode(' OR ', $clauses).')' : null;
    290354}
    291355
    292356
    293357/**
    294  * returns the search results (array of image ids) corresponding to a
    295  * quick/query search. A quick/query search returns many items (search is
    296  * not strict), but results are sorted by relevance.
     358 * returns the search results corresponding to a quick/query search.
     359 * A quick/query search returns many items (search is not strict), but results
     360 * are sorted by relevance unless $page['super_order_by'] is set. Returns:
     361 * array (
     362 * 'items' => array(85,68,79...)
     363 * 'as_is' => 1 (indicates the caller that items are ordered and permissions checked
     364 * 'qs'    => array(
     365 *    'matching_tags' => array(85,86) - matching tags
     366 *    'matching_cats' => array(1,2,3) - matching categories
     367 *    'matching_cats_no_images' =>array(99) - matching categories without images
     368 *      ))
    297369 *
    298370 * @param string q
     371 * @param string images_where optional aditional restriction on images table
    299372 * @return array
    300373 */
    301 function get_quick_search_results($q)
     374function get_quick_search_results($q, $images_where='')
    302375{
    303376  global $page;
    304   $search_results = array();
     377  $search_results =
     378    array(
     379      'items' => array(),
     380      'as_is' => 1,
     381      'qs' => array('q'=>stripslashes($q)),
     382    );
    305383  $q = trim($q);
    306384  if (empty($q))
    307385  {
    308     $search_results['items'] = array();
    309386    return $search_results;
    310387  }
    311   // prepare the big join on images, comments and categories
     388  $q_like_field = '@@__db_field__@@'; //something never in a search
     389  $q_like_clause = get_qsearch_like_clause($q, $q_like_field );
     390
     391
     392  // Step 1 - first we find matches in #images table ===========================
     393  $where_clauses='MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)';
     394  if (!empty($q_like_clause))
     395  {
     396    $where_clauses .= '
     397    OR '. str_replace($q_like_field, 'file', $q_like_clause);
     398    $where_clauses = '('.$where_clauses.')';
     399  }
     400  $where_clauses = array($where_clauses);
     401  if (!empty($images_where))
     402  {
     403    $where_clauses[]='('.$images_where.')';
     404  }
     405  $where_clauses[] .= get_sql_condition_FandF
     406      (
     407        array( 'visible_images' => 'i.id' ), null, true
     408      );
    312409  $query = '
    313 SELECT
    314   i.id, CAST( CONCAT_WS(" ",
    315     IFNULL(i.name,""),
    316     IFNULL(i.comment,""),
    317     IFNULL(GROUP_CONCAT(DISTINCT co.content),""),
    318     IFNULL(GROUP_CONCAT(DISTINCT c.dir),""),
    319     IFNULL(GROUP_CONCAT(DISTINCT c.name),""),
    320     IFNULL(GROUP_CONCAT(DISTINCT c.comment),"") ) AS CHAR) AS ft
    321 FROM (
    322   (
    323     '.IMAGES_TABLE.' i LEFT JOIN '.COMMENTS_TABLE.' co on i.id=co.image_id
    324   )
    325     INNER JOIN
    326   '.IMAGE_CATEGORY_TABLE.' ic on ic.image_id=i.id
    327   )
    328     INNER JOIN
    329   '.CATEGORIES_TABLE.' c on c.id=ic.category_id
    330 '.get_sql_condition_FandF
    331   (
    332     array
    333       (
    334         'forbidden_categories' => 'category_id',
    335         'visible_categories' => 'category_id',
    336         'visible_images' => 'i.id'
    337       ),
    338     'WHERE'
    339   ).'
    340 GROUP BY i.id';
    341 
    342   $query = 'SELECT id, MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS q FROM ('.$query.') AS Y
    343 WHERE MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE)';
     410SELECT i.id,
     411    MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS weight
     412  FROM '.IMAGES_TABLE.' i
     413  WHERE '.implode("\n AND ", $where_clauses);
    344414
    345415  $by_weights=array();
     
    347417  while ($row = mysql_fetch_array($result))
    348418  { // weight is important when sorting images by relevance
    349     if ($row['q'])
    350     {
    351       $by_weights[(int)$row['id']] =  2*$row['q'];
    352     }
    353   }
    354 
    355   $permissions_checked = true;
    356   // now search the file name separately (not done in full text because slower
    357   // and the filename in pwg doesn't have spaces so full text is meaningless )
    358   $q_like_clause = get_qsearch_like_clause($q, 'file' );
     419    if ($row['weight'])
     420    {
     421      $by_weights[(int)$row['id']] =  2*$row['weight'];
     422    }
     423    else
     424    {//full text does not match but file name match
     425      $by_weights[(int)$row['id']] =  2;
     426    }
     427  }
     428
     429
     430  // Step 2 - search tags corresponding to the query $q ========================
    359431  if (!empty($q_like_clause))
    360   {
    361     $query = '
    362 SELECT id
    363   FROM '.IMAGES_TABLE.'
    364   WHERE '.$q_like_clause.
    365       get_sql_condition_FandF
    366       (
    367         array
    368           (
    369             'visible_images' => 'id'
    370           ),
    371         'AND'
    372       );
    373     $result = pwg_query($query);
    374     while ($row = mysql_fetch_assoc($result))
    375     { // weight is important when sorting images by relevance
    376       $id=(int)$row['id'];
    377       @$by_weights[$id] += 2;
    378       $permissions_checked = false;
    379     }
    380   }
    381 
    382   // now search tag names corresponding to the query $q. we could have searched
    383   // tags earlier during the big join, but for the sake of the performance and
    384   // because tags have only a simple name we do it separately
    385   $q_like_clause = get_qsearch_like_clause($q, 'CONVERT(name, CHAR)' );
    386   if (!empty($q_like_clause))
    387   {
    388     // search also by url name (without accents)
    389     $q_like_clause_url = get_qsearch_like_clause($q, 'url_name' );
     432  { // search name and url name (without accents)
    390433    $query = '
    391434SELECT id
    392435  FROM '.TAGS_TABLE.'
    393   WHERE '.$q_like_clause.'
    394   OR '.$q_like_clause_url;
     436  WHERE ('.str_replace($q_like_field, 'CONVERT(name, CHAR)', $q_like_clause).'
     437    OR '.str_replace($q_like_field, 'url_name', $q_like_clause).')';
    395438    $tag_ids = array_from_query( $query, 'id');
    396439    if (!empty($tag_ids))
    397     { // we got some tags
     440    { // we got some tags; get the images
     441      $search_results['qs']['matching_tags']=$tag_ids;
    398442      $query = '
    399 SELECT image_id, COUNT(tag_id) AS q
     443SELECT image_id, COUNT(tag_id) AS weight
    400444  FROM '.IMAGE_TAG_TABLE.'
    401445  WHERE tag_id IN ('.implode(',',$tag_ids).')
     
    405449      { // weight is important when sorting images by relevance
    406450        $image_id=(int)$row['image_id'];
    407         @$by_weights[$image_id] += $row['q'];
    408         $permissions_checked = false;
     451        @$by_weights[$image_id] += $row['weight'];
    409452      }
    410453    }
    411454  }
    412455
    413   //at this point, found images might contain images not allowed for the user
    414   if ( !$permissions_checked
    415        and !empty($by_weights)
    416        and !isset($page['super_order_by']) )
    417   {
    418     // before returning the result "as is", make sure the user has the
    419     // permissions for every item
    420     global $conf;
    421     $query = '
     456
     457  // Step 3 - search categories corresponding to the query $q ==================
     458  global $user;
     459  $query = '
     460SELECT id, nb_images
     461  FROM '.CATEGORIES_TABLE.'
     462    INNER JOIN '.USER_CACHE_CATEGORIES_TABLE.' ON id=cat_id
     463  WHERE user_id='.$user['id'].'
     464    AND MATCH(name, comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)'.
     465  get_sql_condition_FandF (
     466      array( 'visible_categories' => 'cat_id' ), "\n    AND"
     467    );
     468  $result = pwg_query($query);
     469  while ($row = mysql_fetch_assoc($result))
     470  { // weight is important when sorting images by relevance
     471    if ($row['nb_images']==0)
     472    {
     473      $search_results['qs']['matching_cats_no_images'][] = $row['id'];
     474    }
     475    else
     476    {
     477      $search_results['qs']['matching_cats'][] = $row['id'];
     478    }
     479  }
     480
     481  if ( empty($by_weights) and empty($search_results['qs']['matching_cats']) )
     482  {
     483    return $search_results;
     484  }
     485
     486  // Step 4 - now we have $by_weights ( array image id => weight ) that need
     487  // permission checks and/or matching categories to get images from
     488  $where_clauses = array();
     489  if ( !empty($by_weights) )
     490  {
     491    $where_clauses[]='i.id IN ('
     492      . implode(',', array_keys($by_weights)) . ')';
     493  }
     494  if ( !empty($search_results['qs']['matching_cats']) )
     495  {
     496    $where_clauses[]='category_id IN ('.
     497      implode(',',$search_results['qs']['matching_cats']).')';
     498  }
     499  $where_clauses = array( '('.implode("\n    OR ",$where_clauses).')' );
     500  if (!empty($images_where))
     501  {
     502    $where_clauses[]='('.$images_where.')';
     503  }
     504  $where_clauses[] = get_sql_condition_FandF(
     505      array
     506        (
     507          'forbidden_categories' => 'category_id',
     508          'visible_categories' => 'category_id',
     509          'visible_images' => 'i.id'
     510        ),
     511      null,true
     512    );
     513
     514  global $conf;
     515  $query = '
    422516SELECT DISTINCT(id)
    423   FROM '.IMAGES_TABLE.'
     517  FROM '.IMAGES_TABLE.' i
    424518    INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
    425   WHERE id IN ('.implode(',', array_keys($by_weights) ).')
    426 '.get_sql_condition_FandF
    427   (
    428     array
    429       (
    430         'forbidden_categories' => 'category_id',
    431         'visible_categories' => 'category_id',
    432         'visible_images' => 'id'
    433       ),
    434     'AND'
    435   ).'
    436   '.$conf['order_by'];
    437     $allowed_images = array_flip( array_from_query( $query, 'id') );
    438     $by_weights = array_intersect_key($by_weights, $allowed_images );
    439     $divisor = 4.0 * count($allowed_images);
    440     // decrease weight from 0 to 0.25 corresponding to the order
    441     foreach ($allowed_images as $id=>$rank )
    442     {
    443       $by_weights[$id] -=  $rank / $divisor;
    444     }
    445     $permissions_checked = true;
    446   }
    447   arsort($by_weights, SORT_NUMERIC);
    448   if ( $permissions_checked )
    449   {
    450     $search_results['as_is']=1;
    451   }
    452  
    453   $search_results['items'] = array_keys($by_weights);
     519  WHERE '.implode("\n AND ", $where_clauses)."\n".
     520  $conf['order_by'];
     521
     522  $allowed_images = array_from_query( $query, 'id');
     523
     524  if ( isset($page['super_order_by']) or empty($by_weights) )
     525  {
     526    $search_results['items'] = $allowed_images;
     527    return $search_results;
     528  }
     529
     530  $allowed_images = array_flip( $allowed_images );
     531  $divisor = 5.0 * count($allowed_images);
     532  foreach ($allowed_images as $id=>$rank )
     533  {
     534    $weight = isset($by_weights[$id]) ? $by_weights[$id] : 1;
     535    $weight -= $rank/$divisor;
     536    $allowed_images[$id] = $weight;
     537  }
     538  arsort($allowed_images, SORT_NUMERIC);
     539  $search_results['items'] = array_keys($allowed_images);
    454540  return $search_results;
    455541}
     
    459545 *
    460546 * @param int search id
     547 * @param string images_where optional aditional restriction on images table
    461548 * @return array
    462549 */
    463 function get_search_results($search_id)
     550function get_search_results($search_id, $images_where='')
    464551{
    465552  $search = get_search_array($search_id);
     
    471558  else
    472559  {
    473     return get_quick_search_results($search['q']);
     560    return get_quick_search_results($search['q'], $images_where);
    474561  }
    475562}
Note: See TracChangeset for help on using the changeset viewer.