source: trunk/include/functions_search.inc.php @ 2043

Last change on this file since 2043 was 2043, checked in by rvelices, 17 years ago

quick search results improvement: elements with the same relevance are ordered using the global order_bye

  • Property svn:eol-style set to LF
  • Property svn:keywords set to Author Date Id Revision
File size: 12.5 KB
Line 
1<?php
2// +-----------------------------------------------------------------------+
3// | PhpWebGallery - a PHP based picture gallery                           |
4// | Copyright (C) 2002-2003 Pierrick LE GALL - pierrick@phpwebgallery.net |
5// | Copyright (C) 2003-2007 PhpWebGallery Team - http://phpwebgallery.net |
6// +-----------------------------------------------------------------------+
7// | file          : $Id: functions_search.inc.php 2043 2007-06-23 19:40:38Z rvelices $
8// | last update   : $Date: 2007-06-23 19:40:38 +0000 (Sat, 23 Jun 2007) $
9// | last modifier : $Author: rvelices $
10// | revision      : $Revision: 2043 $
11// +-----------------------------------------------------------------------+
12// | This program is free software; you can redistribute it and/or modify  |
13// | it under the terms of the GNU General Public License as published by  |
14// | the Free Software Foundation                                          |
15// |                                                                       |
16// | This program is distributed in the hope that it will be useful, but   |
17// | WITHOUT ANY WARRANTY; without even the implied warranty of            |
18// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU      |
19// | General Public License for more details.                              |
20// |                                                                       |
21// | You should have received a copy of the GNU General Public License     |
22// | along with this program; if not, write to the Free Software           |
23// | Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, |
24// | USA.                                                                  |
25// +-----------------------------------------------------------------------+
26
27
28/**
29 * returns search rules stored into a serialized array in "search"
30 * table. Each search rules set is numericaly identified.
31 *
32 * @param int search_id
33 * @return array
34 */
35function get_search_array($search_id)
36{
37  if (!is_numeric($search_id))
38  {
39    die('Search id must be an integer');
40  }
41
42  $query = '
43SELECT rules
44  FROM '.SEARCH_TABLE.'
45  WHERE id = '.$search_id.'
46;';
47  list($serialized_rules) = mysql_fetch_row(pwg_query($query));
48
49  return unserialize($serialized_rules);
50}
51
52/**
53 * returns the SQL clause from a search identifier
54 *
55 * Search rules are stored in search table as a serialized array. This array
56 * need to be transformed into an SQL clause to be used in queries.
57 *
58 * @param array search
59 * @return string
60 */
61function get_sql_search_clause($search)
62{
63  // SQL where clauses are stored in $clauses array during query
64  // construction
65  $clauses = array();
66
67  foreach (array('file','name','comment','author') as $textfield)
68  {
69    if (isset($search['fields'][$textfield]))
70    {
71      $local_clauses = array();
72      foreach ($search['fields'][$textfield]['words'] as $word)
73      {
74        array_push($local_clauses, $textfield." LIKE '%".$word."%'");
75      }
76
77      // adds brackets around where clauses
78      $local_clauses = prepend_append_array_items($local_clauses, '(', ')');
79
80      array_push(
81        $clauses,
82        implode(
83          ' '.$search['fields'][$textfield]['mode'].' ',
84          $local_clauses
85          )
86        );
87    }
88  }
89
90  if (isset($search['fields']['allwords']))
91  {
92    $fields = array('file', 'name', 'comment', 'author');
93    // in the OR mode, request bust be :
94    // ((field1 LIKE '%word1%' OR field2 LIKE '%word1%')
95    // OR (field1 LIKE '%word2%' OR field2 LIKE '%word2%'))
96    //
97    // in the AND mode :
98    // ((field1 LIKE '%word1%' OR field2 LIKE '%word1%')
99    // AND (field1 LIKE '%word2%' OR field2 LIKE '%word2%'))
100    $word_clauses = array();
101    foreach ($search['fields']['allwords']['words'] as $word)
102    {
103      $field_clauses = array();
104      foreach ($fields as $field)
105      {
106        array_push($field_clauses, $field." LIKE '%".$word."%'");
107      }
108      // adds brackets around where clauses
109      array_push(
110        $word_clauses,
111        implode(
112          "\n          OR ",
113          $field_clauses
114          )
115        );
116    }
117
118    array_walk(
119      $word_clauses,
120      create_function('&$s','$s="(".$s.")";')
121      );
122
123    array_push(
124      $clauses,
125      "\n         ".
126      implode(
127        "\n         ".
128              $search['fields']['allwords']['mode'].
129        "\n         ",
130        $word_clauses
131        )
132      );
133  }
134
135  foreach (array('date_available', 'date_creation') as $datefield)
136  {
137    if (isset($search['fields'][$datefield]))
138    {
139      array_push(
140        $clauses,
141        $datefield." = '".$search['fields'][$datefield]['date']."'"
142        );
143    }
144
145    foreach (array('after','before') as $suffix)
146    {
147      $key = $datefield.'-'.$suffix;
148
149      if (isset($search['fields'][$key]))
150      {
151        array_push(
152          $clauses,
153
154          $datefield.
155          ($suffix == 'after'             ? ' >' : ' <').
156          ($search['fields'][$key]['inc'] ? '='  : '').
157          " '".$search['fields'][$key]['date']."'"
158
159          );
160      }
161    }
162  }
163
164  if (isset($search['fields']['cat']))
165  {
166    if ($search['fields']['cat']['sub_inc'])
167    {
168      // searching all the categories id of sub-categories
169      $cat_ids = get_subcat_ids($search['fields']['cat']['words']);
170    }
171    else
172    {
173      $cat_ids = $search['fields']['cat']['words'];
174    }
175
176    $local_clause = 'category_id IN ('.implode(',', $cat_ids).')';
177    array_push($clauses, $local_clause);
178  }
179
180  // adds brackets around where clauses
181  $clauses = prepend_append_array_items($clauses, '(', ')');
182
183  $where_separator =
184    implode(
185      "\n    ".$search['mode'].' ',
186      $clauses
187      );
188
189  $search_clause = $where_separator;
190
191  return $search_clause;
192}
193
194/**
195 * returns the list of items corresponding to the advanced search array
196 *
197 * @param array search
198 * @return array
199 */
200function get_regular_search_results($search)
201{
202  $items = array();
203
204  $search_clause = get_sql_search_clause($search);
205
206  if (!empty($search_clause))
207  {
208    $query = '
209SELECT DISTINCT(id)
210  FROM '.IMAGES_TABLE.'
211    INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
212  WHERE '.$search_clause.'
213;';
214    $items = array_from_query($query, 'id');
215  }
216
217  if (isset($search['fields']['tags']))
218  {
219    $tag_items = get_image_ids_for_tags(
220      $search['fields']['tags']['words'],
221      $search['fields']['tags']['mode']
222      );
223
224    switch ($search['mode'])
225    {
226      case 'AND':
227      {
228        if (empty($search_clause))
229        {
230          $items = $tag_items;
231        }
232        else
233        {
234          $items = array_intersect($items, $tag_items);
235        }
236        break;
237      }
238      case 'OR':
239      {
240        $items = array_unique(
241          array_merge(
242            $items,
243            $tag_items
244            )
245          );
246        break;
247      }
248    }
249  }
250
251  return $items;
252}
253
254/**
255 * returns the LIKE sql clause corresponding to the quick search query $q
256 * and the field $field. example q="john bill", field="file" will return
257 * file LIKE "%john%" OR file LIKE "%bill%". Special characters for MySql
258 * full text search (+,<,>) are omitted.
259 * @param string q
260 * @param string field
261 * @return string
262 */
263function get_qsearch_like_clause($q, $field)
264{
265  $tokens = preg_split('/[\s,.;!\?]+/', $q);
266  for ($i=0; $i<count($tokens); $i++)
267  {
268    $tokens[$i]=str_replace('*','%', $tokens[$i]);
269    if (preg_match('/^[+<>]/',$tokens[$i]) )
270      $tokens[$i]=substr($tokens[$i], 1);
271    else if (substr($tokens[$i], 0, 1)=='-')
272    {
273      unset($tokens[$i]);
274      $i--;
275    }
276  }
277
278  if (!empty($tokens))
279  {
280    $query = '(';
281    for ($i=0; $i<count($tokens); $i++)
282    {
283      if ($i>0) $query .= 'OR ';
284      $query .= ' '.$field.' LIKE "%'.$tokens[$i].'%" ';
285    }
286    $query .= ')';
287    return $query;
288  }
289  return null;
290}
291
292
293/**
294 * returns the search results (array of image ids) corresponding to a
295 * quick/query search. A quick/query search returns many items (search is
296 * not strict), but results are sorted by relevance.
297 *
298 * @param string q
299 * @return array
300 */
301function get_quick_search_results($q)
302{
303  global $page;
304  $search_results = array();
305  $q = trim($q);
306  if (empty($q))
307  {
308    $search_results['items'] = array();
309    return $search_results;
310  }
311  // prepare the big join on images, comments and categories
312  $query = '
313SELECT
314  i.id, CAST( CONCAT_WS(" ",
315    IFNULL(i.name,""),
316    IFNULL(i.comment,""),
317    IFNULL(GROUP_CONCAT(DISTINCT co.content),""),
318    IFNULL(GROUP_CONCAT(DISTINCT c.dir),""),
319    IFNULL(GROUP_CONCAT(DISTINCT c.name),""),
320    IFNULL(GROUP_CONCAT(DISTINCT c.comment),"") ) AS CHAR) AS ft
321FROM (
322  (
323    '.IMAGES_TABLE.' i LEFT JOIN '.COMMENTS_TABLE.' co on i.id=co.image_id
324  )
325    INNER JOIN
326  '.IMAGE_CATEGORY_TABLE.' ic on ic.image_id=i.id
327  )
328    INNER JOIN
329  '.CATEGORIES_TABLE.' c on c.id=ic.category_id
330'.get_sql_condition_FandF
331  (
332    array
333      (
334        'forbidden_categories' => 'category_id',
335        'visible_categories' => 'category_id',
336        'visible_images' => 'i.id'
337      ),
338    'WHERE'
339  ).'
340GROUP BY i.id';
341
342  $query = 'SELECT id, MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS q FROM ('.$query.') AS Y
343WHERE MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE)';
344
345  $by_weights=array();
346  $result = pwg_query($query);
347  while ($row = mysql_fetch_array($result))
348  { // weight is important when sorting images by relevance
349    if ($row['q'])
350    {
351      $by_weights[(int)$row['id']] =  2*$row['q'];
352    }
353  }
354
355  $permissions_checked = true;
356  // now search the file name separately (not done in full text because slower
357  // and the filename in pwg doesn't have spaces so full text is meaningless )
358  $q_like_clause = get_qsearch_like_clause($q, 'file' );
359  if (!empty($q_like_clause))
360  {
361    $query = '
362SELECT id
363  FROM '.IMAGES_TABLE.'
364  WHERE '.$q_like_clause.
365      get_sql_condition_FandF
366      (
367        array
368          (
369            'visible_images' => 'id'
370          ),
371        'AND'
372      );
373    $result = pwg_query($query);
374    while ($row = mysql_fetch_assoc($result))
375    { // weight is important when sorting images by relevance
376      $id=(int)$row['id'];
377      @$by_weights[$id] += 2;
378      $permissions_checked = false;
379    }
380  }
381
382  // now search tag names corresponding to the query $q. we could have searched
383  // tags earlier during the big join, but for the sake of the performance and
384  // because tags have only a simple name we do it separately
385  $q_like_clause = get_qsearch_like_clause($q, 'CONVERT(name, CHAR)' );
386  if (!empty($q_like_clause))
387  {
388    // search also by url name (without accents)
389    $q_like_clause_url = get_qsearch_like_clause($q, 'url_name' );
390    $query = '
391SELECT id
392  FROM '.TAGS_TABLE.'
393  WHERE '.$q_like_clause.'
394  OR '.$q_like_clause_url;
395    $tag_ids = array_from_query( $query, 'id');
396    if (!empty($tag_ids))
397    { // we got some tags
398      $query = '
399SELECT image_id, COUNT(tag_id) AS q
400  FROM '.IMAGE_TAG_TABLE.'
401  WHERE tag_id IN ('.implode(',',$tag_ids).')
402  GROUP BY image_id';
403      $result = pwg_query($query);
404      while ($row = mysql_fetch_assoc($result))
405      { // weight is important when sorting images by relevance
406        $image_id=(int)$row['image_id'];
407        @$by_weights[$image_id] += $row['q'];
408        $permissions_checked = false;
409      }
410    }
411  }
412
413  //at this point, found images might contain images not allowed for the user
414  if ( !$permissions_checked
415       and !empty($by_weights)
416       and !isset($page['super_order_by']) )
417  {
418    // before returning the result "as is", make sure the user has the
419    // permissions for every item
420    global $conf;
421    $query = '
422SELECT DISTINCT(id)
423  FROM '.IMAGES_TABLE.'
424    INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id
425  WHERE id IN ('.implode(',', array_keys($by_weights) ).')
426'.get_sql_condition_FandF
427  (
428    array
429      (
430        'forbidden_categories' => 'category_id',
431        'visible_categories' => 'category_id',
432        'visible_images' => 'id'
433      ),
434    'AND'
435  ).'
436  '.$conf['order_by'];
437    $allowed_images = array_flip( array_from_query( $query, 'id') );
438    $by_weights = array_intersect_key($by_weights, $allowed_images );
439    $divisor = 4.0 * count($allowed_images);
440    // decrease weight from 0 to 0.25 corresponding to the order
441    foreach ($allowed_images as $id=>$rank )
442    {
443      $by_weights[$id] -=  $rank / $divisor;
444    }
445    $permissions_checked = true;
446  }
447  arsort($by_weights, SORT_NUMERIC);
448  if ( $permissions_checked )
449  {
450    $search_results['as_is']=1;
451  }
452 
453  $search_results['items'] = array_keys($by_weights);
454  return $search_results;
455}
456
457/**
458 * returns an array of 'items' corresponding to the search id
459 *
460 * @param int search id
461 * @return array
462 */
463function get_search_results($search_id)
464{
465  $search = get_search_array($search_id);
466  if ( !isset($search['q']) )
467  {
468    $result['items'] = get_regular_search_results($search);
469    return $result;
470  }
471  else
472  {
473    return get_quick_search_results($search['q']);
474  }
475}
476?>
Note: See TracBrowser for help on using the repository browser.