Changeset 28152


Ignore:
Timestamp:
Apr 10, 2014, 11:23:00 PM (10 years ago)
Author:
rvelices
Message:

bug 3056: quick search - better handling of short words and photo acronyms such as AF-S EF-S X-E2 etc ...

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/functions_search.inc.php

    r28144 r28152  
    604604              break;
    605605            }
     606            if (strlen($crt_token) && isdigit(substr($crt_token,0,-1))
     607              && $qi+1<strlen($q) && isdigit($q[$qi+1]))
     608            {// dot between digits is not a separator e.g. F2.8
     609              $crt_token .= $ch;
     610              break;
     611            }
    606612            // else white space go on..
    607613          default:
    608             if (preg_match('/[\s,.;!\?]+/', $ch))
     614            if (strpos(' ,.;!?', $ch)!==false)
    609615            { // white space
    610616              $this->push($crt_token, $crt_modifier, $crt_scope);
     
    823829  foreach ($variants as $variant)
    824830  {
    825     if (mb_strlen($variant)<=3
    826       || strcspn($variant, '!"#$%&()*+,./:;<=>?@[\]^`{|}~') < 3)
     831    $use_ft = mb_strlen($variant)>3;
     832    if ($token->modifier & QST_WILDCARD_BEGIN)
     833      $use_ft = false;
     834    if (($token->modifier & QST_QUOTED|QST_WILDCARD_END) == QST_QUOTED|QST_WILDCARD_END)
     835      $use_ft = false;
     836    if ($use_ft)
     837    {
     838      $max = max( array_map( 'mb_strlen',
     839        preg_split('/['.preg_quote('!"#$%&()*+,./:;<=>?@[\]^`{|}~','/').']+/', $variant0, PREG_SPLIT_NO_EMPTY)
     840        ) );
     841      if ($max<4)
     842        $use_ft = false;
     843    }
     844
     845    if (!$use_ft)
    827846    {// odd term or too short for full text search; fallback to regex but unfortunately this is diacritic/accent sensitive
    828847      $pre = ($token->modifier & QST_WILDCARD_BEGIN) ? '' : '[[:<:]]';
Note: See TracChangeset for help on using the changeset viewer.