Changeset 2135 for trunk/include/functions_search.inc.php
- Timestamp:
- Oct 12, 2007, 5:27:34 AM (17 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/functions_search.inc.php
r2043 r2135 254 254 /** 255 255 * returns the LIKE sql clause corresponding to the quick search query $q 256 * and the field $field. example q="john bill", field="file" will return 257 * file LIKE "%john%" OR file LIKE "%bill%". Special characters for MySql 258 * full text search (+,<,>) are omitted. 256 * and the field $field. example q='john bill', field='file' will return 257 * file LIKE '%john%' OR file LIKE '%bill%'. Special characters for MySql full 258 * text search (+,<,>,~) are omitted. The query can contain a phrase: 259 * 'Pierre "New York"' will return LIKE '%Pierre%' OR LIKE '%New York%'. 259 260 * @param string q 260 261 * @param string field … … 263 264 function get_qsearch_like_clause($q, $field) 264 265 { 265 $tokens = preg_split('/[\s,.;!\?]+/', $q); 266 $q = stripslashes($q); 267 $tokens = array(); 268 $token_modifiers = array(); 269 $crt_token = ""; 270 $crt_token_modifier = ""; 271 $state = 0; 272 273 for ($i=0; $i<strlen($q); $i++) 274 { 275 $ch = $q[$i]; 276 switch ($state) 277 { 278 case 0: 279 if ($ch=='"') 280 { 281 if (strlen($crt_token)) 282 { 283 $tokens[] = $crt_token; 284 $token_modifiers[] = $crt_token_modifier; 285 $crt_token = ""; 286 $crt_token_modifier = ""; 287 } 288 $state=1; 289 } 290 elseif ( $ch=='*' ) 291 { // wild card 292 $crt_token .= '%'; 293 } 294 elseif ( strcspn($ch, '+-><~')==0 ) 295 { //special full text modifier 296 if (strlen($crt_token)) 297 { 298 $tokens[] = $crt_token; 299 $token_modifiers[] = $crt_token_modifier; 300 $crt_token = ""; 301 $crt_token_modifier = ""; 302 } 303 $crt_token_modifier .= $ch; 304 } 305 elseif (preg_match('/[\s,.;!\?]+/', $ch)) 306 { // white space 307 if (strlen($crt_token)) 308 { 309 $tokens[] = $crt_token; 310 $token_modifiers[] = $crt_token_modifier; 311 $crt_token = ""; 312 $crt_token_modifier = ""; 313 } 314 } 315 else 316 { 317 $crt_token .= $ch; 318 } 319 break; 320 case 1: // qualified with quotes 321 switch ($ch) 322 { 323 case '"': 324 $tokens[] = $crt_token; 325 $token_modifiers[] = $crt_token_modifier; 326 $crt_token = ""; 327 $crt_token_modifier = ""; 328 $state=0; 329 break; 330 default: 331 $crt_token .= $ch; 332 } 333 break; 334 } 335 } 336 if (strlen($crt_token)) 337 { 338 $tokens[] = $crt_token; 339 $token_modifiers[] = $crt_token_modifier; 340 } 341 342 $clauses = array(); 266 343 for ($i=0; $i<count($tokens); $i++) 267 344 { 268 $tokens[$i]=str_replace('*','%', $tokens[$i]); 269 if (preg_match('/^[+<>]/',$tokens[$i]) ) 270 $tokens[$i]=substr($tokens[$i], 1); 271 else if (substr($tokens[$i], 0, 1)=='-') 272 { 273 unset($tokens[$i]); 274 $i--; 275 } 276 } 277 278 if (!empty($tokens)) 279 { 280 $query = '('; 281 for ($i=0; $i<count($tokens); $i++) 282 { 283 if ($i>0) $query .= 'OR '; 284 $query .= ' '.$field.' LIKE "%'.$tokens[$i].'%" '; 285 } 286 $query .= ')'; 287 return $query; 288 } 289 return null; 345 $tokens[$i] = trim($tokens[$i], '%'); 346 if (strstr($token_modifiers[$i], '-')!==false) 347 continue; 348 if ( strlen($tokens[$i])==0) 349 continue; 350 $clauses[] = $field.' LIKE "%'.addslashes($tokens[$i]).'%"'; 351 } 352 353 return count($clauses) ? '('.implode(' OR ', $clauses).')' : null; 290 354 } 291 355 292 356 293 357 /** 294 * returns the search results (array of image ids) corresponding to a 295 * quick/query search. A quick/query search returns many items (search is 296 * not strict), but results are sorted by relevance. 358 * returns the search results corresponding to a quick/query search. 359 * A quick/query search returns many items (search is not strict), but results 360 * are sorted by relevance unless $page['super_order_by'] is set. Returns: 361 * array ( 362 * 'items' => array(85,68,79...) 363 * 'as_is' => 1 (indicates the caller that items are ordered and permissions checked 364 * 'qs' => array( 365 * 'matching_tags' => array(85,86) - matching tags 366 * 'matching_cats' => array(1,2,3) - matching categories 367 * 'matching_cats_no_images' =>array(99) - matching categories without images 368 * )) 297 369 * 298 370 * @param string q 371 * @param string images_where optional aditional restriction on images table 299 372 * @return array 300 373 */ 301 function get_quick_search_results($q )374 function get_quick_search_results($q, $images_where='') 302 375 { 303 376 global $page; 304 $search_results = array(); 377 $search_results = 378 array( 379 'items' => array(), 380 'as_is' => 1, 381 'qs' => array('q'=>stripslashes($q)), 382 ); 305 383 $q = trim($q); 306 384 if (empty($q)) 307 385 { 308 $search_results['items'] = array();309 386 return $search_results; 310 387 } 311 // prepare the big join on images, comments and categories 388 $q_like_field = '@@__db_field__@@'; //something never in a search 389 $q_like_clause = get_qsearch_like_clause($q, $q_like_field ); 390 391 392 // Step 1 - first we find matches in #images table =========================== 393 $where_clauses='MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)'; 394 if (!empty($q_like_clause)) 395 { 396 $where_clauses .= ' 397 OR '. str_replace($q_like_field, 'file', $q_like_clause); 398 $where_clauses = '('.$where_clauses.')'; 399 } 400 $where_clauses = array($where_clauses); 401 if (!empty($images_where)) 402 { 403 $where_clauses[]='('.$images_where.')'; 404 } 405 $where_clauses[] .= get_sql_condition_FandF 406 ( 407 array( 'visible_images' => 'i.id' ), null, true 408 ); 312 409 $query = ' 313 SELECT 314 i.id, CAST( CONCAT_WS(" ", 315 IFNULL(i.name,""), 316 IFNULL(i.comment,""), 317 IFNULL(GROUP_CONCAT(DISTINCT co.content),""), 318 IFNULL(GROUP_CONCAT(DISTINCT c.dir),""), 319 IFNULL(GROUP_CONCAT(DISTINCT c.name),""), 320 IFNULL(GROUP_CONCAT(DISTINCT c.comment),"") ) AS CHAR) AS ft 321 FROM ( 322 ( 323 '.IMAGES_TABLE.' i LEFT JOIN '.COMMENTS_TABLE.' co on i.id=co.image_id 324 ) 325 INNER JOIN 326 '.IMAGE_CATEGORY_TABLE.' ic on ic.image_id=i.id 327 ) 328 INNER JOIN 329 '.CATEGORIES_TABLE.' c on c.id=ic.category_id 330 '.get_sql_condition_FandF 331 ( 332 array 333 ( 334 'forbidden_categories' => 'category_id', 335 'visible_categories' => 'category_id', 336 'visible_images' => 'i.id' 337 ), 338 'WHERE' 339 ).' 340 GROUP BY i.id'; 341 342 $query = 'SELECT id, MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS q FROM ('.$query.') AS Y 343 WHERE MATCH(ft) AGAINST( "'.$q.'" IN BOOLEAN MODE)'; 410 SELECT i.id, 411 MATCH(i.name, i.comment) AGAINST( "'.$q.'" IN BOOLEAN MODE) AS weight 412 FROM '.IMAGES_TABLE.' i 413 WHERE '.implode("\n AND ", $where_clauses); 344 414 345 415 $by_weights=array(); … … 347 417 while ($row = mysql_fetch_array($result)) 348 418 { // weight is important when sorting images by relevance 349 if ($row['q']) 350 { 351 $by_weights[(int)$row['id']] = 2*$row['q']; 352 } 353 } 354 355 $permissions_checked = true; 356 // now search the file name separately (not done in full text because slower 357 // and the filename in pwg doesn't have spaces so full text is meaningless ) 358 $q_like_clause = get_qsearch_like_clause($q, 'file' ); 419 if ($row['weight']) 420 { 421 $by_weights[(int)$row['id']] = 2*$row['weight']; 422 } 423 else 424 {//full text does not match but file name match 425 $by_weights[(int)$row['id']] = 2; 426 } 427 } 428 429 430 // Step 2 - search tags corresponding to the query $q ======================== 359 431 if (!empty($q_like_clause)) 360 { 361 $query = ' 362 SELECT id 363 FROM '.IMAGES_TABLE.' 364 WHERE '.$q_like_clause. 365 get_sql_condition_FandF 366 ( 367 array 368 ( 369 'visible_images' => 'id' 370 ), 371 'AND' 372 ); 373 $result = pwg_query($query); 374 while ($row = mysql_fetch_assoc($result)) 375 { // weight is important when sorting images by relevance 376 $id=(int)$row['id']; 377 @$by_weights[$id] += 2; 378 $permissions_checked = false; 379 } 380 } 381 382 // now search tag names corresponding to the query $q. we could have searched 383 // tags earlier during the big join, but for the sake of the performance and 384 // because tags have only a simple name we do it separately 385 $q_like_clause = get_qsearch_like_clause($q, 'CONVERT(name, CHAR)' ); 386 if (!empty($q_like_clause)) 387 { 388 // search also by url name (without accents) 389 $q_like_clause_url = get_qsearch_like_clause($q, 'url_name' ); 432 { // search name and url name (without accents) 390 433 $query = ' 391 434 SELECT id 392 435 FROM '.TAGS_TABLE.' 393 WHERE '.$q_like_clause.'394 OR '.$q_like_clause_url;436 WHERE ('.str_replace($q_like_field, 'CONVERT(name, CHAR)', $q_like_clause).' 437 OR '.str_replace($q_like_field, 'url_name', $q_like_clause).')'; 395 438 $tag_ids = array_from_query( $query, 'id'); 396 439 if (!empty($tag_ids)) 397 { // we got some tags 440 { // we got some tags; get the images 441 $search_results['qs']['matching_tags']=$tag_ids; 398 442 $query = ' 399 SELECT image_id, COUNT(tag_id) AS q443 SELECT image_id, COUNT(tag_id) AS weight 400 444 FROM '.IMAGE_TAG_TABLE.' 401 445 WHERE tag_id IN ('.implode(',',$tag_ids).') … … 405 449 { // weight is important when sorting images by relevance 406 450 $image_id=(int)$row['image_id']; 407 @$by_weights[$image_id] += $row['q']; 408 $permissions_checked = false; 451 @$by_weights[$image_id] += $row['weight']; 409 452 } 410 453 } 411 454 } 412 455 413 //at this point, found images might contain images not allowed for the user 414 if ( !$permissions_checked 415 and !empty($by_weights) 416 and !isset($page['super_order_by']) ) 417 { 418 // before returning the result "as is", make sure the user has the 419 // permissions for every item 420 global $conf; 421 $query = ' 456 457 // Step 3 - search categories corresponding to the query $q ================== 458 global $user; 459 $query = ' 460 SELECT id, nb_images 461 FROM '.CATEGORIES_TABLE.' 462 INNER JOIN '.USER_CACHE_CATEGORIES_TABLE.' ON id=cat_id 463 WHERE user_id='.$user['id'].' 464 AND MATCH(name, comment) AGAINST( "'.$q.'" IN BOOLEAN MODE)'. 465 get_sql_condition_FandF ( 466 array( 'visible_categories' => 'cat_id' ), "\n AND" 467 ); 468 $result = pwg_query($query); 469 while ($row = mysql_fetch_assoc($result)) 470 { // weight is important when sorting images by relevance 471 if ($row['nb_images']==0) 472 { 473 $search_results['qs']['matching_cats_no_images'][] = $row['id']; 474 } 475 else 476 { 477 $search_results['qs']['matching_cats'][] = $row['id']; 478 } 479 } 480 481 if ( empty($by_weights) and empty($search_results['qs']['matching_cats']) ) 482 { 483 return $search_results; 484 } 485 486 // Step 4 - now we have $by_weights ( array image id => weight ) that need 487 // permission checks and/or matching categories to get images from 488 $where_clauses = array(); 489 if ( !empty($by_weights) ) 490 { 491 $where_clauses[]='i.id IN (' 492 . implode(',', array_keys($by_weights)) . ')'; 493 } 494 if ( !empty($search_results['qs']['matching_cats']) ) 495 { 496 $where_clauses[]='category_id IN ('. 497 implode(',',$search_results['qs']['matching_cats']).')'; 498 } 499 $where_clauses = array( '('.implode("\n OR ",$where_clauses).')' ); 500 if (!empty($images_where)) 501 { 502 $where_clauses[]='('.$images_where.')'; 503 } 504 $where_clauses[] = get_sql_condition_FandF( 505 array 506 ( 507 'forbidden_categories' => 'category_id', 508 'visible_categories' => 'category_id', 509 'visible_images' => 'i.id' 510 ), 511 null,true 512 ); 513 514 global $conf; 515 $query = ' 422 516 SELECT DISTINCT(id) 423 FROM '.IMAGES_TABLE.' 517 FROM '.IMAGES_TABLE.' i 424 518 INNER JOIN '.IMAGE_CATEGORY_TABLE.' AS ic ON id = ic.image_id 425 WHERE id IN ('.implode(',', array_keys($by_weights) ).') 426 '.get_sql_condition_FandF 427 ( 428 array 429 ( 430 'forbidden_categories' => 'category_id', 431 'visible_categories' => 'category_id', 432 'visible_images' => 'id' 433 ), 434 'AND' 435 ).' 436 '.$conf['order_by']; 437 $allowed_images = array_flip( array_from_query( $query, 'id') ); 438 $by_weights = array_intersect_key($by_weights, $allowed_images ); 439 $divisor = 4.0 * count($allowed_images); 440 // decrease weight from 0 to 0.25 corresponding to the order 441 foreach ($allowed_images as $id=>$rank ) 442 { 443 $by_weights[$id] -= $rank / $divisor; 444 } 445 $permissions_checked = true; 446 } 447 arsort($by_weights, SORT_NUMERIC); 448 if ( $permissions_checked ) 449 { 450 $search_results['as_is']=1; 451 } 452 453 $search_results['items'] = array_keys($by_weights); 519 WHERE '.implode("\n AND ", $where_clauses)."\n". 520 $conf['order_by']; 521 522 $allowed_images = array_from_query( $query, 'id'); 523 524 if ( isset($page['super_order_by']) or empty($by_weights) ) 525 { 526 $search_results['items'] = $allowed_images; 527 return $search_results; 528 } 529 530 $allowed_images = array_flip( $allowed_images ); 531 $divisor = 5.0 * count($allowed_images); 532 foreach ($allowed_images as $id=>$rank ) 533 { 534 $weight = isset($by_weights[$id]) ? $by_weights[$id] : 1; 535 $weight -= $rank/$divisor; 536 $allowed_images[$id] = $weight; 537 } 538 arsort($allowed_images, SORT_NUMERIC); 539 $search_results['items'] = array_keys($allowed_images); 454 540 return $search_results; 455 541 } … … 459 545 * 460 546 * @param int search id 547 * @param string images_where optional aditional restriction on images table 461 548 * @return array 462 549 */ 463 function get_search_results($search_id )550 function get_search_results($search_id, $images_where='') 464 551 { 465 552 $search = get_search_array($search_id); … … 471 558 else 472 559 { 473 return get_quick_search_results($search['q'] );560 return get_quick_search_results($search['q'], $images_where); 474 561 } 475 562 }
Note: See TracChangeset
for help on using the changeset viewer.