"""Returns first words index id where the field code 'field' is word-indexed.
Returns zero in case there is no words table for this index.
Example: field='author', output=4."""
out = 0
query = """SELECT w.id FROM wordsindex AS w, wordsindex_field AS wf, field AS f
WHERE f.code='%s' AND wf.id_field=f.id AND w.id=wf.id_wordsindex
LIMIT 1""" % MySQLdb.escape_string(field)
res = run_sql(query, None, 1)
if res:
out = res[0][0]
return out
def get_words_from_pattern(pattern):
"Returns list of whitespace-separated words from pattern."
words = {}
for word in split(pattern):
if not words.has_key(word):
words[word] = 1;
return words.keys()
def create_basic_search_units(req, p, f, m=None):
"""Splits search pattern and search field into a list of independently searchable units.
- A search unit consists of '(operand, pattern, field, type, hitset)' tuples where
'operand' is set union (|), set intersection (+) or set exclusion (-);
'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics');
'field' is either a code like 'title' or MARC tag like '100__a';
'type' is the search type ('w' for word file search, 'a' for access file search).
- Optionally, the function accepts the match type argument 'm'.
If it is set (e.g. from advanced search interface), then it
performs this kind of matching. If it is not set, then a guess is made.
'm' can have values: 'a'='all of the words', 'o'='any of the words',
'p'='phrase/substring', 'r'='regular expression',
'e'='exact value'."""
opfts = [] # will hold (o,p,f,t,h) units
## check arguments: if matching type phrase/string/regexp, do we have field defined?
if (m=='p' or m=='r' or m=='e') and not f:
m = 'a'
print_warning(req, "This matching type cannot be used within <em>any field</em>. I will perform a word search instead." )
print_warning(req, "If you want to phrase/substring/regexp search in a specific field, e.g. inside title, then please choose <em>within title</em> search option.")
## is desired matching type set?
if m:
## A - matching type is known; good!
if m == 'e':
# A1 - exact value:
opfts.append(['|',p,f,'a']) # '|' since we have only one unit
elif m == 'p':
# A2 - phrase/substring:
opfts.append(['|',"%"+p+"%",f,'a']) # '|' since we have only one unit
elif m == 'r':
# A3 - regular expression:
opfts.append(['|',p,f,'r']) # '|' since we have only one unit
elif m == 'a' or m == 'w':
# A4 - all of the words:
for word in get_words_from_pattern(p):
if len(opfts)==0:
opfts.append(['|',word,f,'w']) # '|' in the first unit
else:
opfts.append(['+',word,f,'w']) # '+' in further units
elif m == 'o':
# A5 - any of the words:
for word in get_words_from_pattern(p):
opfts.append(['|',word,f,'w']) # '|' in all units
else:
print_warning(req, "Matching type '%s' is not implemented yet." % m, "Warning")
opfts.append(['|',"%"+p+"%",f,'a'])
else:
## B - matching type is not known: let us try to determine it by some heuristics
if f and p[0]=='"' and p[-1]=='"':
## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search
opfts.append(['|',p[1:-1],f,'a'])
elif f and p[0]=="'" and p[-1]=="'":
## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search
opfts.append(['|','%'+p[1:-1]+'%',f,'a'])
elif f and string.find(p, ',') >= 0:
## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search
opfts.append(['|',p,f,'a'])
elif f and str(f[0:2]).isdigit():
## B2 - does 'f' exist and starts by two digits? => doing ACC search
opfts.append(['|',p,f,'a'])
else:
## B3 - doing WRD search, but maybe ACC too
# search units are separated by spaces unless the space is within single or double quotes
# so, let us replace temporarily any space within quotes by '__SPACE__'
p = sre.sub("'(.*?)'", lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
p = sre.sub("\"(.*?)\"", lambda x: "\""+string.replace(x.group(1), ' ', '__SPACEBIS__')+"\"", p)
# wash argument:
p = re_equal.sub(":", p)
p = re_logical_and.sub(" ", p)
p = re_logical_or.sub(" |", p)
p = re_logical_not.sub(" -", p)
p = re_operands.sub(r' \1', p)
for pi in split(p): # iterate through separated units (or items, as "pi" stands for "p item")
pi = sre.sub("__SPACE__", " ", pi) # replace back '__SPACE__' by ' '
pi = sre.sub("__SPACEBIS__", " ", pi) # replace back '__SPACEBIS__' by ' '
# firstly, determine set operand
if pi[0] == '+' or pi[0] == '-' or pi[0] == '|':
if len(opfts) or pi[0] == '-': # either not first unit, or '-' for the first unit
oi = pi[0]
else:
oi = "|" # we are in the first unit and operand is not '-', so let us do
# set union (with still null result set)
pi = pi[1:]
else:
# okay, there is no operand, so let us decide what to do by default
if len(opfts):
oi = '+' # by default we are doing set intersection...
else:
oi = "|" # ...unless we are in the first unit
# secondly, determine search pattern and field:
if string.find(pi, ":") > 0:
fi, pi = split(pi, ":", 1)
else:
fi, pi = f, pi
# look also for old ALEPH field names:
if fi and cfg_fields_convert.has_key(string.lower(fi)):
fi = cfg_fields_convert[string.lower(fi)]
# wash 'pi' argument:
if re_quotes.match(pi):
# B3a - quotes are found => do ACC search (phrase search)
if fi:
if re_doublequote.match(pi):
pi = string.replace(pi, '"', '') # get rid of quotes
opfts.append([oi,pi,fi,'a'])
else:
pi = string.replace(pi, "'", '') # get rid of quotes
opfts.append([oi,"%"+pi+"%",fi,'a'])
else:
# fi is not defined, look at where we are doing exact or subphrase search (single/double quotes):
if pi[0]=='"' and pi[-1]=='"':
opfts.append([oi,pi[1:-1],"anyfield",'a'])
print_warning(req, "Searching for an exact match inside any field may be slow. You may want to search for words instead, or choose to search within specific field.")
else:
# nope, subphrase in global index is not possible => change back to WRD search
for pii in get_words_from_pattern(pi):
# since there may be '-' and other chars that we do not index in WRD
opfts.append([oi,pii,fi,'w'])
print_warning(req, "The partial phrase search does not work in any field. I'll do a boolean AND searching instead.")
print_warning(req, "If you want to do a partial phrase search in a specific field, e.g. inside title, then please choose 'within title' search option.", "Tip")
print_warning(req, "If you want to do exact phrase matching, then please use double quotes.", "Tip")
elif fi and str(fi[0]).isdigit() and str(fi[0]).isdigit():
# B3b - fi exists and starts by two digits => do ACC search
opfts.append([oi,pi,fi,'a'])
elif fi and not get_wordsindex_id(fi):
# B3c - fi exists but there is no words table for fi => try ACC search
# speed up HitSet operations by ~20% if Psyco is installed:
try:
import psyco
psyco.bind(HitSet)
except:
pass
def escape_string(s):
"Escapes special chars in string. For MySQL queries."
s = MySQLdb.escape_string(s)
return s
def wash_colls(cc, c, split_colls=0):
"""Wash collection list by checking whether user has deselected
anything under 'Narrow search'. Checks also if cc is a list or not.
Return list of cc, colls_to_display, colls_to_search since the list
of collections to display is different from that to search in.
This is because users might have chosen 'split by collection'
functionality.
The behaviour of "collections to display" depends solely whether
user has deselected a particular collection: e.g. if it started
from 'Articles and Preprints' page, and deselected 'Preprints',
then collection to display is 'Articles'. If he did not deselect
anything, then collection to display is 'Articles & Preprints'.
The behaviour of "collections to search in" depends on the
'split_colls' parameter:
* if is equal to 1, then we can wash the colls list down
and search solely in the collection the user started from;
* if is equal to 0, then we are splitting to the first level
of collections, i.e. collections as they appear on the page
we started to search from;
"""
colls_out = []
colls_out_for_display = []
# check what type is 'cc':
if type(cc) is list:
for ci in cc:
if collection_reclist_cache.has_key(ci):
# yes this collection is real, so use it:
cc = ci
break
else:
# check once if cc is real:
if not collection_reclist_cache.has_key(cc):
cc = cdsname # cc is not real, so replace it with Home collection
# check type of 'c' argument:
if type(c) is list:
colls = c
else:
colls = [c]
# remove all 'unreal' collections:
colls_real = []
for coll in colls:
if collection_reclist_cache.has_key(coll):
colls_real.append(coll)
colls = colls_real
# check if some real collections remain:
if len(colls)==0:
colls = [cc]
# then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll':
query = "SELECT c.name FROM collection AS c, collection_collection AS cc, collection AS ccc WHERE c.id=cc.id_son AND cc.id_dad=ccc.id AND ccc.name='%s' AND cc.type='r' AND c.restricted IS NULL" % MySQLdb.escape_string(cc)
res = run_sql(query)
l_cc_nonrestricted_sons = []
l_c = colls
for row in res:
l_cc_nonrestricted_sons.append(row[0])
l_c.sort()
l_cc_nonrestricted_sons.sort()
if l_cc_nonrestricted_sons == l_c:
colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc'
else:
colls_out_for_display = colls # nope, we need to display all 'colls' successively
# remove duplicates:
colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1))
"Prints results overview box with links to particular collections below."
out = ""
if len(colls) == 1:
# if one collection only, print nothing:
return out
# first find total number of hits:
out += "<p><table class=\"searchresultsbox\" width=\"100%%\">" \
"<thead><tr><th class=\"searchresultsboxheader\"><strong>Results overview:</strong> Found <strong>%s</strong> records in %.2f seconds.</th></tr></thead>" % \
(nice_number(results_final_nb_total), cpu_time)
# then print hits per collection:
out += "<tbody><tr><td class=\"searchresultsboxbody\">"
for coll in colls:
if results_final_nb.has_key(coll) and results_final_nb[coll] > 0:
out += "<strong><a href=\"#%s\">%s</a></strong>, " \
"""Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'.
If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by
'sort pattern', for example "sort by report number that starts by CERN-PS".
Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly."""
## check arguments:
if not sort_field:
return recIDs
if len(recIDs) > cfg_nb_records_to_sort:
print_warning(req, "Sorry, sorting is allowed on sets of up to %d records only. Using default sort order (\"latest first\")." % cfg_nb_records_to_sort,"Warning")
return recIDs
recIDs_dict = {}
recIDs_out = []
## first deduce sorting MARC tag out of the 'sort_field' argument:
tags = []
if sort_field and str(sort_field[0:2]).isdigit():
# sort_field starts by two digits, so this is probably a MARC tag already
tags.append(sort_field)
else:
# let us check the 'field' table
query = """SELECT DISTINCT(t.value) FROM tag AS t, field_tag AS ft, field AS f
WHERE f.code='%s' AND ft.id_field=f.id AND t.id=ft.id_tag
ORDER BY ft.score DESC""" % sort_field
res = run_sql(query)
if res:
for row in res:
tags.append(row[0])
else:
print_warning(req, "Sorry, '%s' does not seem to be a valid sort option. Choosing title sort instead." % sort_field, "Error")
tags.append("245__a")
## check if we have sorting tag defined:
if tags:
# fetch the necessary field values:
for recID in recIDs:
val = "" # will hold value for recID according to which sort
vals = [] # will hold all values found in sorting tag for recID
for tag in tags:
vals.extend(get_fieldvalues(recID, tag))
if sort_pattern:
# try to pick that tag value that corresponds to sort pattern
bingo = 0
for v in vals:
if v.startswith(sort_pattern): # bingo!
bingo = 1
val = v
break
if not bingo: # not found, so joint them all together
val = string.join(vals)
else:
# no sort pattern defined, so join them all together
out += "%s (%d)<br>" % (coll, get_collection_reclist(coll)._nbhits)
out += "</blockquote>"
# show search cache:
out += "<h3>Search Cache</h3>"
out += "<blockquote>"
if len(search_cache):
out += """<table border="=">"""
out += "<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>" % ("Pattern","Field","Collection","Number of Hits")
for search_cache_key in search_cache.keys():
p, f, c = string.split(search_cache_key, "@", 2)
# find out about length of cached data:
l = 0
for coll in search_cache[search_cache_key]:
l += search_cache[search_cache_key][coll]._nbhits
out += "<tr><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>" % (p, f, c, l)
out += "</table>"
else:
out += "<p>Search cache is empty."
out += "</blockquote>"
out += """<p><a href="%s/search.py/cache?action=clear">clear cache</a>""" % weburl
req.write(out)
return "\n"
def perform_request_log(req, date=""):
"""Display search log information for given date."""
"""Returns first words index id where the field code 'field' is word-indexed.
Returns zero in case there is no words table for this index.
Example: field='author', output=4."""
out = 0
query = """SELECT w.id FROM wordsindex AS w, wordsindex_field AS wf, field AS f
WHERE f.code='%s' AND wf.id_field=f.id AND w.id=wf.id_wordsindex
LIMIT 1""" % MySQLdb.escape_string(field)
res = run_sql(query, None, 1)
if res:
out = res[0][0]
return out
def get_words_from_pattern(pattern):
"Returns list of whitespace-separated words from pattern."
words = {}
for word in split(pattern):
if not words.has_key(word):
words[word] = 1;
return words.keys()
def create_basic_search_units(req, p, f, m=None):
"""Splits search pattern and search field into a list of independently searchable units.
- A search unit consists of '(operand, pattern, field, type, hitset)' tuples where
'operand' is set union (|), set intersection (+) or set exclusion (-);
'pattern' is either a word (e.g. muon*) or a phrase (e.g. 'nuclear physics');
'field' is either a code like 'title' or MARC tag like '100__a';
'type' is the search type ('w' for word file search, 'a' for access file search).
- Optionally, the function accepts the match type argument 'm'.
If it is set (e.g. from advanced search interface), then it
performs this kind of matching. If it is not set, then a guess is made.
'm' can have values: 'a'='all of the words', 'o'='any of the words',
'p'='phrase/substring', 'r'='regular expression',
'e'='exact value'."""
opfts = [] # will hold (o,p,f,t,h) units
## check arguments: if matching type phrase/string/regexp, do we have field defined?
if (m=='p' or m=='r' or m=='e') and not f:
m = 'a'
print_warning(req, "This matching type cannot be used within <em>any field</em>. I will perform a word search instead." )
print_warning(req, "If you want to phrase/substring/regexp search in a specific field, e.g. inside title, then please choose <em>within title</em> search option.")
## is desired matching type set?
if m:
## A - matching type is known; good!
if m == 'e':
# A1 - exact value:
opfts.append(['|',p,f,'a']) # '|' since we have only one unit
elif m == 'p':
# A2 - phrase/substring:
opfts.append(['|',"%"+p+"%",f,'a']) # '|' since we have only one unit
elif m == 'r':
# A3 - regular expression:
opfts.append(['|',p,f,'r']) # '|' since we have only one unit
elif m == 'a' or m == 'w':
# A4 - all of the words:
for word in get_words_from_pattern(p):
if len(opfts)==0:
opfts.append(['|',word,f,'w']) # '|' in the first unit
else:
opfts.append(['+',word,f,'w']) # '+' in further units
elif m == 'o':
# A5 - any of the words:
for word in get_words_from_pattern(p):
opfts.append(['|',word,f,'w']) # '|' in all units
else:
print_warning(req, "Matching type '%s' is not implemented yet." % m, "Warning")
opfts.append(['|',"%"+p+"%",f,'a'])
else:
## B - matching type is not known: let us try to determine it by some heuristics
if f and p[0]=='"' and p[-1]=='"':
## B0 - does 'p' start and end by double quote, and is 'f' defined? => doing ACC search
opfts.append(['|',p[1:-1],f,'a'])
elif f and p[0]=="'" and p[-1]=="'":
## B0bis - does 'p' start and end by single quote, and is 'f' defined? => doing ACC search
opfts.append(['|','%'+p[1:-1]+'%',f,'a'])
elif f and string.find(p, ',') >= 0:
## B1 - does 'p' contain comma, and is 'f' defined? => doing ACC search
opfts.append(['|',p,f,'a'])
elif f and str(f[0:2]).isdigit():
## B2 - does 'f' exist and starts by two digits? => doing ACC search
opfts.append(['|',p,f,'a'])
else:
## B3 - doing WRD search, but maybe ACC too
# search units are separated by spaces unless the space is within single or double quotes
# so, let us replace temporarily any space within quotes by '__SPACE__'
p = sre.sub("'(.*?)'", lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", p)
p = sre.sub("\"(.*?)\"", lambda x: "\""+string.replace(x.group(1), ' ', '__SPACEBIS__')+"\"", p)
# wash argument:
p = re_equal.sub(":", p)
p = re_logical_and.sub(" ", p)
p = re_logical_or.sub(" |", p)
p = re_logical_not.sub(" -", p)
p = re_operands.sub(r' \1', p)
for pi in split(p): # iterate through separated units (or items, as "pi" stands for "p item")
pi = sre.sub("__SPACE__", " ", pi) # replace back '__SPACE__' by ' '
pi = sre.sub("__SPACEBIS__", " ", pi) # replace back '__SPACEBIS__' by ' '
# firstly, determine set operand
if pi[0] == '+' or pi[0] == '-' or pi[0] == '|':
if len(opfts) or pi[0] == '-': # either not first unit, or '-' for the first unit
oi = pi[0]
else:
oi = "|" # we are in the first unit and operand is not '-', so let us do
# set union (with still null result set)
pi = pi[1:]
else:
# okay, there is no operand, so let us decide what to do by default
if len(opfts):
oi = '+' # by default we are doing set intersection...
else:
oi = "|" # ...unless we are in the first unit
# secondly, determine search pattern and field:
if string.find(pi, ":") > 0:
fi, pi = split(pi, ":", 1)
else:
fi, pi = f, pi
# look also for old ALEPH field names:
if fi and cfg_fields_convert.has_key(string.lower(fi)):
fi = cfg_fields_convert[string.lower(fi)]
# wash 'pi' argument:
if re_quotes.match(pi):
# B3a - quotes are found => do ACC search (phrase search)
if fi:
if re_doublequote.match(pi):
pi = string.replace(pi, '"', '') # get rid of quotes
opfts.append([oi,pi,fi,'a'])
else:
pi = string.replace(pi, "'", '') # get rid of quotes
opfts.append([oi,"%"+pi+"%",fi,'a'])
else:
# fi is not defined, look at where we are doing exact or subphrase search (single/double quotes):
if pi[0]=='"' and pi[-1]=='"':
opfts.append([oi,pi[1:-1],"anyfield",'a'])
print_warning(req, "Searching for an exact match inside any field may be slow. You may want to search for words instead, or choose to search within specific field.")
else:
# nope, subphrase in global index is not possible => change back to WRD search
for pii in get_words_from_pattern(pi):
# since there may be '-' and other chars that we do not index in WRD
opfts.append([oi,pii,fi,'w'])
print_warning(req, "The partial phrase search does not work in any field. I'll do a boolean AND searching instead.")
print_warning(req, "If you want to do a partial phrase search in a specific field, e.g. inside title, then please choose 'within title' search option.", "Tip")
print_warning(req, "If you want to do exact phrase matching, then please use double quotes.", "Tip")
elif fi and str(fi[0]).isdigit() and str(fi[0]).isdigit():
# B3b - fi exists and starts by two digits => do ACC search
opfts.append([oi,pi,fi,'a'])
elif fi and not get_wordsindex_id(fi):
# B3c - fi exists but there is no words table for fi => try ACC search
# speed up HitSet operations by ~20% if Psyco is installed:
try:
import psyco
psyco.bind(HitSet)
except:
pass
def escape_string(s):
"Escapes special chars in string. For MySQL queries."
s = MySQLdb.escape_string(s)
return s
def wash_colls(cc, c, split_colls=0):
"""Wash collection list by checking whether user has deselected
anything under 'Narrow search'. Checks also if cc is a list or not.
Return list of cc, colls_to_display, colls_to_search since the list
of collections to display is different from that to search in.
This is because users might have chosen 'split by collection'
functionality.
The behaviour of "collections to display" depends solely whether
user has deselected a particular collection: e.g. if it started
from 'Articles and Preprints' page, and deselected 'Preprints',
then collection to display is 'Articles'. If he did not deselect
anything, then collection to display is 'Articles & Preprints'.
The behaviour of "collections to search in" depends on the
'split_colls' parameter:
* if is equal to 1, then we can wash the colls list down
and search solely in the collection the user started from;
* if is equal to 0, then we are splitting to the first level
of collections, i.e. collections as they appear on the page
we started to search from;
"""
colls_out = []
colls_out_for_display = []
# check what type is 'cc':
if type(cc) is list:
for ci in cc:
if collection_reclist_cache.has_key(ci):
# yes this collection is real, so use it:
cc = ci
break
else:
# check once if cc is real:
if not collection_reclist_cache.has_key(cc):
cc = cdsname # cc is not real, so replace it with Home collection
# check type of 'c' argument:
if type(c) is list:
colls = c
else:
colls = [c]
# remove all 'unreal' collections:
colls_real = []
for coll in colls:
if collection_reclist_cache.has_key(coll):
colls_real.append(coll)
colls = colls_real
# check if some real collections remain:
if len(colls)==0:
colls = [cc]
# then let us check the list of non-restricted "real" sons of 'cc' and compare it to 'coll':
query = "SELECT c.name FROM collection AS c, collection_collection AS cc, collection AS ccc WHERE c.id=cc.id_son AND cc.id_dad=ccc.id AND ccc.name='%s' AND cc.type='r' AND c.restricted IS NULL" % MySQLdb.escape_string(cc)
res = run_sql(query)
l_cc_nonrestricted_sons = []
l_c = colls
for row in res:
l_cc_nonrestricted_sons.append(row[0])
l_c.sort()
l_cc_nonrestricted_sons.sort()
if l_cc_nonrestricted_sons == l_c:
colls_out_for_display = [cc] # yep, washing permitted, it is sufficient to display 'cc'
else:
colls_out_for_display = colls # nope, we need to display all 'colls' successively
# remove duplicates:
colls_out_for_display_nondups=filter(lambda x, colls_out_for_display=colls_out_for_display: colls_out_for_display[x-1] not in colls_out_for_display[x:], range(1, len(colls_out_for_display)+1))
"Prints results overview box with links to particular collections below."
out = ""
if len(colls) == 1:
# if one collection only, print nothing:
return out
# first find total number of hits:
out += "<p><table class=\"searchresultsbox\" width=\"100%%\">" \
"<thead><tr><th class=\"searchresultsboxheader\"><strong>Results overview:</strong> Found <strong>%s</strong> records in %.2f seconds.</th></tr></thead>" % \
(nice_number(results_final_nb_total), cpu_time)
# then print hits per collection:
out += "<tbody><tr><td class=\"searchresultsboxbody\">"
for coll in colls:
if results_final_nb.has_key(coll) and results_final_nb[coll] > 0:
out += "<strong><a href=\"#%s\">%s</a></strong>, " \
"""Sort records in 'recIDs' list according sort field 'sort_field' in order 'sort_order'.
If more than one instance of 'sort_field' is found for a given record, try to choose that that is given by
'sort pattern', for example "sort by report number that starts by CERN-PS".
Note that 'sort_field' can be field code like 'author' or MARC tag like '100__a' directly."""
## check arguments:
if not sort_field:
return recIDs
if len(recIDs) > cfg_nb_records_to_sort:
print_warning(req, "Sorry, sorting is allowed on sets of up to %d records only. Using default sort order (\"latest first\")." % cfg_nb_records_to_sort,"Warning")
return recIDs
recIDs_dict = {}
recIDs_out = []
## first deduce sorting MARC tag out of the 'sort_field' argument:
tags = []
if sort_field and str(sort_field[0:2]).isdigit():
# sort_field starts by two digits, so this is probably a MARC tag already
tags.append(sort_field)
else:
# let us check the 'field' table
query = """SELECT DISTINCT(t.value) FROM tag AS t, field_tag AS ft, field AS f
WHERE f.code='%s' AND ft.id_field=f.id AND t.id=ft.id_tag
ORDER BY ft.score DESC""" % sort_field
res = run_sql(query)
if res:
for row in res:
tags.append(row[0])
else:
print_warning(req, "Sorry, '%s' does not seem to be a valid sort option. Choosing title sort instead." % sort_field, "Error")
tags.append("245__a")
## check if we have sorting tag defined:
if tags:
# fetch the necessary field values:
for recID in recIDs:
val = "" # will hold value for recID according to which sort
vals = [] # will hold all values found in sorting tag for recID
for tag in tags:
vals.extend(get_fieldvalues(recID, tag))
if sort_pattern:
# try to pick that tag value that corresponds to sort pattern
bingo = 0
for v in vals:
if v.startswith(sort_pattern): # bingo!
bingo = 1
val = v
break
if not bingo: # not found, so joint them all together
val = string.join(vals)
else:
# no sort pattern defined, so join them all together
out += "%s (%d)<br>" % (coll, get_collection_reclist(coll)._nbhits)
out += "</blockquote>"
# show search cache:
out += "<h3>Search Cache</h3>"
out += "<blockquote>"
if len(search_cache):
out += """<table border="=">"""
out += "<tr><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td><td><strong>%s</strong></td></tr>" % ("Pattern","Field","Collection","Number of Hits")
for search_cache_key in search_cache.keys():
p, f, c = string.split(search_cache_key, "@", 2)
# find out about length of cached data:
l = 0
for coll in search_cache[search_cache_key]:
l += search_cache[search_cache_key][coll]._nbhits
out += "<tr><td>%s</td><td>%s</td><td>%s</td><td>%d</td></tr>" % (p, f, c, l)
out += "</table>"
else:
out += "<p>Search cache is empty."
out += "</blockquote>"
out += """<p><a href="%s/search.py/cache?action=clear">clear cache</a>""" % weburl
req.write(out)
return "\n"
def perform_request_log(req, date=""):
"""Display search log information for given date."""