"""Splits search pattern and search field into a list of independently searchable units.
- A search unit consists of '(operator, pattern, field, type, hitset)' tuples where
'operator' is set union (|), set intersection (+) or set exclusion (-);
'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics');
'field' is either a code like 'title' or MARC tag like '100__a';
'type' is the search type ('w' for word file search, 'a' for access file search).
- Optionally, the function accepts the match type argument 'm'.
If it is set (e.g. from advanced search interface), then it
performs this kind of matching. If it is not set, then a guess is made.
'm' can have values: 'a'='all of the words', 'o'='any of the words',
'p'='phrase/substring', 'r'='regular expression',
'e'='exact value'.
- Warnings are printed on req (when not None) in case of HTML output formats."""
opfts=[]# will hold (o,p,f,t,h) units
# FIXME: quick hack for the journal index
iff=='journal':
opfts.append(['+',p,f,'w'])
returnopfts
## check arguments: if matching type phrase/string/regexp, do we have field defined?
if(m=='p'orm=='r'orm=='e')andnotf:
m='a'
ifof.startswith("h"):
print_warning(req,"This matching type cannot be used within <em>any field</em>. I will perform a word search instead.")
print_warning(req,"If you want to phrase/substring/regexp search in a specific field, e.g. inside title, then please choose <em>within title</em> search option.")
## is desired matching type set?
ifm:
## A - matching type is known; good!
ifm=='e':
# A1 - exact value:
opfts.append(['+',p,f,'a'])# '+' since we have only one unit
elifm=='p':
# A2 - phrase/substring:
opfts.append(['+',"%"+p+"%",f,'a'])# '+' since we have only one unit
elifm=='r':
# A3 - regular expression:
opfts.append(['+',p,f,'r'])# '+' since we have only one unit
elifm=='a'orm=='w':
# A4 - all of the words:
p=strip_accents(p)# strip accents for 'w' mode, FIXME: delete when not needed
forwordinget_words_from_pattern(p):
opfts.append(['+',word,f,'w'])# '+' in all units
elifm=='o':
# A5 - any of the words:
p=strip_accents(p)# strip accents for 'w' mode, FIXME: delete when not needed
forwordinget_words_from_pattern(p):
iflen(opfts)==0:
opfts.append(['+',word,f,'w'])# '+' in the first unit
else:
opfts.append(['|',word,f,'w'])# '|' in further units
else:
ifof.startswith("h"):
print_warning(req,"Matching type '%s' is not implemented yet."%m,"Warning")
opfts.append(['+',"%"+p+"%",f,'a'])
else:
## B - matching type is not known: let us try to determine it by some heuristics
iffandp[0]=='"'andp[-1]=='"':
## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search
opfts.append(['+',p[1:-1],f,'a'])
eliffandp[0]=="'"andp[-1]=="'":
## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search
opfts.append(['+','%'+p[1:-1]+'%',f,'a'])
eliffandp[0]=="/"andp[-1]=="/":
## B0ter - does 'p' start and end by a slash, and is 'f' defined? => doing regexp search
opfts.append(['+',p[1:-1],f,'r'])
eliffandstring.find(p,',')>=0:
## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search
opfts.append(['+',p,f,'a'])
eliffandstr(f[0:2]).isdigit():
## B2 - does 'f' exist and starts by two digits? => doing ACC search
opfts.append(['+',p,f,'a'])
else:
## B3 - doing WRD search, but maybe ACC too
# search units are separated by spaces unless the space is within single or double quotes
# so, let us replace temporarily any space within quotes by '__SPACE__'
# B3a - quotes are found => do ACC search (phrase search)
iffi:
ifpi[0]=='"'andpi[-1]=='"':
pi=string.replace(pi,'"','')# remove quote signs
opfts.append([oi,pi,fi,'a'])
elifpi[0]=="'"andpi[-1]=="'":
pi=string.replace(pi,"'","")# remove quote signs
opfts.append([oi,"%"+pi+"%",fi,'a'])
else:# unbalanced quotes, so do WRD query:
opfts.append([oi,pi,fi,'w'])
else:
# fi is not defined, look at where we are doing exact or subphrase search (single/double quotes):
ifpi[0]=='"'andpi[-1]=='"':
opfts.append([oi,pi[1:-1],"anyfield",'a'])
ifof.startswith("h"):
print_warning(req,"Searching for an exact match inside any field may be slow. You may want to search for words instead, or choose to search within specific field.")
else:
# nope, subphrase in global index is not possible => change back to WRD search
pi=strip_accents(pi)# strip accents for 'w' mode, FIXME: delete when not needed
forpiiinget_words_from_pattern(pi):
# since there may be '-' and other chars that we do not index in WRD
opfts.append([oi,pii,fi,'w'])
ifof.startswith("h"):
print_warning(req,"The partial phrase search does not work in any field. I'll do a boolean AND searching instead.")
print_warning(req,"If you want to do a partial phrase search in a specific field, e.g. inside title, then please choose 'within title' search option.","Tip")
print_warning(req,"If you want to do exact phrase matching, then please use double quotes.","Tip")
"""Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'.
If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by
'sort pattern', for example "sort by report number that starts by CERN-PS".
Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly."""
_=gettext_set_language(ln)
## check arguments:
ifnotsort_field:
returnrecIDs
iflen(recIDs)>CFG_WEBSEARCH_NB_RECORDS_TO_SORT:
ifof.startswith('h'):
print_warning(req,_("Sorry, sorting is allowed on sets of up to %d records only. Using default sort order.")%CFG_WEBSEARCH_NB_RECORDS_TO_SORT,"Warning")
returnrecIDs
sort_fields=string.split(sort_field,",")
recIDs_dict={}
recIDs_out=[]
## first deduce sorting MARC tag out of the 'sort_field' argument:
tags=[]
forsort_fieldinsort_fields:
ifsort_fieldandstr(sort_field[0:2]).isdigit():
# sort_field starts by two digits, so this is probably a MARC tag already
tags.append(sort_field)
else:
# let us check the 'field' table
query="""SELECT DISTINCT(t.value) FROM tag AS t, field_tag AS ft, field AS f
WHERE f.code='%s' AND ft.id_field=f.id AND t.id=ft.id_tag
ORDER BY ft.score DESC"""%sort_field
res=run_sql(query)
ifres:
forrowinres:
tags.append(row[0])
else:
ifof.startswith('h'):
print_warning(req,_("Sorry, %s does not seem to be a valid sort option. Choosing title sort instead.")%sort_field,"Error")
tags.append("245__a")
ifverbose>=3:
print_warning(req,"Sorting by tags %s."%tags)
ifsort_pattern:
print_warning(req,"Sorting preferentially by %s."%sort_pattern)
## check if we have sorting tag defined:
iftags:
# fetch the necessary field values:
forrecIDinrecIDs:
val=""# will hold value for recID according to which sort
vals=[]# will hold all values found in sorting tag for recID
fortagintags:
vals.extend(get_fieldvalues(recID,tag))
ifsort_pattern:
# try to pick that tag value that corresponds to sort pattern
req.write("<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>"%("No.","Time","Pattern","Field","Collection","Number of Hits"))