webcoll.in
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Thu, Oct 31, 16:44

webcoll.in
View Options

	## $Id$
	## Script that creates collection pages, starting from the collection
	## passed to the script as an argument.

	## This file is part of the CERN Document Server Software (CDSware).
	## Copyright (C) 2002 CERN.
	##
	## The CDSware is free software; you can redistribute it and/or
	## modify it under the terms of the GNU General Public License as
	## published by the Free Software Foundation; either version 2 of the
	## License, or (at your option) any later version.
	##
	## The CDSware is distributed in the hope that it will be useful, but
	## WITHOUT ANY WARRANTY; without even the implied warranty of
	## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	## General Public License for more details.
	##
	## You should have received a copy of the GNU General Public License
	## along with CDSware; if not, write to the Free Software Foundation, Inc.,
	## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.

	## read config variables:
	#include "config.wml"
	#include "configbis.wml"

	## start Python:
	<protect>#!</protect><PYTHON>
	<protect>## $Id$</protect>
	<protect>## DO NOT EDIT THIS FILE! IT WAS AUTOMATICALLY GENERATED FROM CDSware WML SOURCES.</protect>
	"""Creates CDSware collection specific pages, using WML and MySQL configuration tables."""

	## fill config variables:
	pylibdir = "<LIBDIR>/python"
	wmlexec = "<WML> -I<LIBDIR>/wml/cdsware"

	cfg_update_recinfo = 0 ## TODO: make this a command-line option

	### okay, rest of the Python code goes below ###

	## import modules:
	try:
	import sys
	import cgi
	import re
	import os
	import math
	import string
	import urllib
	import zlib
	import MySQLdb
	import Numeric
	import time
	except ImportError, e:
	print "Error: %s" % e
	import sys
	sys.exit(1)

	try:
	sys.path.append('%s' % pylibdir)
	from cdsware.config import *
	from cdsware.search_engine import HitList, search_pattern
	from cdsware.search_engine_config import cfg_author_et_al_threshold, cfg_instant_browse, cfg_max_recID
	from cdsware.dbquery import run_sql
	except ImportError, e:
	print "Error: %s" % e
	import sys
	sys.exit(1)

	## auxiliary functions:
	def mymkdir(newdir, mode=0777):
	"""works the way a good mkdir should :)
	- already exists, silently complete
	- regular file in the way, raise an exception
	- parent directory(ies) does not exist, make them as well
	"""
	if os.path.isdir(newdir):
	pass
	elif os.path.isfile(newdir):
	raise OSError("a file with the same name as the desired " \
	"dir, '%s', already exists." % newdir)
	else:
	head, tail = os.path.split(newdir)
	if head and not os.path.isdir(head):
	mymkdir(head, mode)
	if tail:
	os.umask(022)
	os.mkdir(newdir, mode)

	def nice_number(num):
	"Returns nice number when using comma as thousands separator."
	chars_in = list(str(num))
	num = len(chars_in)
	chars_out = []
	for i in range(0,num):
	if i % 3 == 0 and i != 0:
	chars_out.append(',')
	chars_out.append(chars_in[num-i-1])
	chars_out.reverse()
	return ''.join(chars_out)

	def escape_string(s):
	"Escapes special chars in string. For MySQL queries."
	s = MySQLdb.escape_string(s)
	return s

	def is_selected(var, fld):
	"Checks if the two are equal, and if yes, returns ' selected'. Useful for select boxes."
	if var == fld:
	return " selected"
	else:
	return ""

	def Log(msg, type=""):
	"Logs a message to the database."
	if type == "Error":
	out = sys.stderr
	else:
	out = sys.stdout
	out.write(time.strftime("%Y-%m-%d %H:%M:%S --> ", time.localtime()))
	out.write(str(msg))
	if type:
	out.write(" (" + type + ")")
	out.write("\n")
	out.flush()

	def create_html_header(name='Search', navtrail='', cdspageboxlefttopadd='', cdspageboxleftbottomadd='',
	cdspageboxrighttopadd='', cdspageboxrightbottomadd=''):
	"Returns standard HTML header."
	# NOTE: in order to let 'create_collection_pages' write the
	# protect tag in 'index.wml' you need to do it in a funny way,
	# because otherwise WML would interpret the protect tag already
	# when compiling this program (create_collection_pages.wml)! This
	# is because the source file itself it is also handled by WML. :-)
	out = """#include "cdspage.wml" \\\
	title="%s" \\\
	navbar_name="main" \\\
	navbar_select="searchbiblinfo" """ % name
	if navtrail:
	out += """\\\
	navtrail_body="<%s>%s</%s>" """ % ('protect', navtrail, 'protect')
	else:
	out += """\\\
	navtrail_body="" """
	if cdspageboxlefttopadd:
	out += """\\\
	cdspageboxlefttopadd="<%s>%s</%s>" """ % ('protect', cdspageboxlefttopadd, 'protect')
	if cdspageboxleftbottomadd:
	out += """\\\
	cdspageboxleftbottomadd="<%s>%s</%s>" """ % ('protect', cdspageboxleftbottomadd, 'protect')
	if cdspageboxrighttopadd:
	out += """\\\
	cdspageboxrighttopadd="<%s>%s</%s>" """ % ('protect', cdspageboxrighttopadd, 'protect')
	if cdspageboxrightbottomadd:
	out += """\\\
	cdspageboxrightbottomadd="<%s>%s</%s>" """ % ('protect', cdspageboxrightbottomadd, 'protect')
	out += "\n\n"
	return out

	def create_andornot_box(name='op', value=''):
	"Returns HTML code for the AND/OR/NOT selection box."
	out = """
	<select name="%s">
	<option value="a"%s>AND
	<option value="o"%s>OR
	<option value="n"%s>AND NOT
	</select>
	""" % (name, is_selected('a', value), is_selected('o', value), is_selected('n', value))
	return out

	def create_matchtype_box(name='m', value=''):
	"Returns HTML code for the 'match type' selection box."
	out = """
	<select name="%s">
	<option value="a"%s>All of the words:
	<option value="o"%s>Any of the words:
	<option value="p"%s>Phrase/substring:
	<option value="r"%s>Regular expression:
	<option value="e"%s>Exact value:
	</select>
	""" % (name, is_selected('a', value), is_selected('o', value), is_selected('p', value),
	is_selected('r', value), is_selected('e', value))
	return out

	def get_field(recID, tag):
	"Gets list of field 'tag' for the record with 'recID' system number."

	out = []
	digit = tag[0:2]

	bx = "bib%sx" % digit
	bibx = "bibrec_bib%sx" % digit
	query = "SELECT bx.value FROM %s AS bx, %s AS bibx WHERE bibx.id_bibrec='%s' AND bx.id=bibx.id_bibxxx AND bx.tag='%s'" \
	% (bx, bibx, recID, tag)
	res = run_sql(query)
	for row in res:
	out.append(row[0])
	return out

	def print_record(recID, format='hb'):
	"Prints record 'recID' formatted accoding to 'format'."

	out = ""
	# HTML brief format by default
	query = "SELECT value FROM bibfmt WHERE id_bibrec='%s' AND format='%s'" % (recID, format)
	res = run_sql(query, None, 1)
	if res:
	# record 'recID' is formatted in 'format', so print it
	out += "%s" % zlib.decompress(res[0][0])
	else:
	# record 'recID' does not exist in format 'format', so print some default format:
	# firstly, title:
	titles = get_field(recID, "245__a")
	for title in titles:
	out += "<strong>%s</strong> " % cgi.escape(title)
	# secondly, authors:
	authors = get_field(recID, "100__a") + get_field(recID, "700__a")
	if authors:
	out += " / "
	for i in range (0,cfg_author_et_al_threshold):
	if i < len(authors):
	out += """<a href="%s/search.py?p=%s&f=author">%s</a> ;""" % (weburl, urllib.quote(authors[i]), cgi.escape(authors[i]))
	if len(authors) > cfg_author_et_al_threshold:
	out += " <em>et al.</em>"
	# thirdly, date of creation:
	dates = get_field(recID, "260__c")
	for date in dates:
	out += " %s." % cgi.escape(date)
	# thirdly bis, report numbers:
	rns = get_field(recID, "037__a")
	for rn in rns:
	out += """ <small class="quicknote">[%s]</small>""" % cgi.escape(rn)
	rns = get_field(recID, "088__a")
	for rn in rns:
	out += """ <small class="quicknote">[%s]</small>""" % cgi.escape(rn)
	# fourthly, beginning of abstract:
	abstracts = get_field(recID, "520__a")
	for abstract in abstracts:
	out += "<br><small>%s [...]</small>" % cgi.escape(abstract[:1+string.find(abstract, '.')])
	# fifthly, fulltext link:
	urls_z = get_field(recID, "8564_z")
	urls_u = get_field(recID, "8564_u")
	for idx in range(0,len(urls_u)):
	out += """<br><small class="note"><a class="note" href="%s">%s</a></small>""" % (urls_u[idx], urls_u[idx])

	# at the end of HTML mode, print "Detailed record" and "Mark record" functions:
	out += """<br><span class="moreinfo"><a class="moreinfo" href="%s/search.py?id=%s">Detailed record</a>""" \
	% (weburl, recID)
	#out += """<- <input name="mark[]" type="checkbox" value="%s"> Mark record""" % recID
	out += "</span>"
	return out

	## class Collection:
	class Collection:
	"Holds the information on collections (id,name,nameshort,dbquery)."

	def __init__(self, name=""):
	"Creates collection instance by querying the MySQL configuration database about 'name'."
	if not name:
	self.name = cdsname # by default we are working on the home page
	self.id = 1
	self.dbquery = None
	self.nbrecs = None
	self.reclist = None
	else:
	self.name = name
	query = "SELECT id,name,nameshort,dbquery,nbrecs,reclist FROM collection WHERE name='%s'" % escape_string(name)
	try:
	res = run_sql(query, None, 1)
	if res:
	self.id = res[0][0]
	self.name = res[0][1]
	self.nameshort = res[0][2]
	self.dbquery = res[0][3]
	self.nbrecs = res[0][4]
	try:
	self.reclist = Numeric.loads(zlib.decompress(res[0][5]))
	except:
	self.reclist = Numeric.zeros(cfg_max_recID+1, Numeric.Int0)
	else: # collection does not exist!
	self.id = None
	self.nameshort = None
	self.dbquery = None
	self.nbrecs = None
	self.reclist = None
	except MySQLdb.Error, e:
	print "Error %d: %s" % (e.args[0], e.args[1])
	sys.exit(1)

	def get_ancestors(self):
	"Returns list of ancestors of the current collection."
	ancestors = []
	id_son = self.id
	while 1:
	query = "SELECT cc.id_dad,c.name FROM collection_collection AS cc, collection AS c "\
	"WHERE cc.id_son=%d AND c.id=cc.id_dad" % int(id_son)
	res = run_sql(query, None, 1)
	if res:
	col_ancestor = Collection(res[0][1])
	ancestors.append(col_ancestor)
	id_son = res[0][0]
	else:
	break
	ancestors.reverse()
	return ancestors

	def restricted_p(self):
	"Predicate to test if the collection is restricted or not."
	query = "SELECT restricted FROM collection WHERE id=%d" % self.id
	res = run_sql(query, None, 1)
	if res and res[0][0] != None:
	return 1
	else:
	return 0

	def get_sons(self, type='r'):
	"Returns list of direct sons of type 'type' for the current collection."
	sons = []
	id_dad = self.id
	query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\
	"WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC, c.name ASC" % (int(id_dad), type)
	res = run_sql(query)
	for row in res:
	sons.append(Collection(row[1]))
	return sons

	def get_descendants(self, type='r'):
	"Returns list of all descendants of type 'type' for the current collection."
	descendants = []
	id_dad = self.id
	query = "SELECT cc.id_son,c.name FROM collection_collection AS cc, collection AS c "\
	"WHERE cc.id_dad=%d AND cc.type='%s' AND c.id=cc.id_son ORDER BY score DESC" % (int(id_dad), type)
	res = run_sql(query)
	for row in res:
	col_desc = Collection(row[1])
	descendants.append(col_desc)
	descendants += col_desc.get_descendants()
	return descendants

	def write_cache_file(self, filename='', filebody=''):
	"Write a file inside collection cache."
	# open file:
	dirname = "%s/collections/%d" % (cachedir, self.id)
	mymkdir(dirname)
	fullfilename = dirname + "/%s.html" % filename
	try:
	os.umask(022)
	f = open(fullfilename, "w")
	except IOError, v:
	try:
	(code, message) = v
	except:
	code = 0
	message = v
	print "I/O Error: " + str(message) + " (" + str(code) + ")"
	sys.exit(1)
	# print user info:
	Log("...... creating %s" % fullfilename)
	sys.stdout.flush()
	# print page body:
	f.write(filebody)
	# close file:
	f.close()

	def update_webpage_cache(self):
	"""Create collection page header, navtrail, body (including left and right stripes) and footer, and
	call write_cache_file() afterwards to update the collection webpage cache."""
	Log("%s" % self.name)
	## first, update navtrail:
	for as in range(0,2):
	self.write_cache_file("navtrail-as=%s" % as, self.create_navtrail(as, "","", "", "", ""))
	## second, update page body:
	for as in range(0,2): # do both simple search and advanced search pages:
	body = ""
	body += """<form action="%s/search.py" method="get">""" % weburl
	body += "" + self.create_searchfor(as)
	body += """<table cellspacing="0" cellpadding="0" border="0">"""
	body += """<tr>"""
	body += """<td valign="top">""" + self.create_narrowsearch(as) + "</td>"
	body_focuson = self.create_narrowsearch(as, "Focus on:","v")
	if body_focuson:
	#body += """<td valign="top" width="15" style="border-right: 1px dotted #999999;"> </td>"""
	body += """<td valign="top" width="30"> </td>"""
	body += """<td valign="top">""" + body_focuson + """</td>"""
	body += "</tr></table>"
	body += "</form>"
	self.write_cache_file("body-as=%s" % as, body)
	## third, write portalboxes:
	self.write_cache_file("portalbox-lt", self.create_portalbox("lt"))
	self.write_cache_file("portalbox-lb", self.create_portalbox("lb"))
	self.write_cache_file("portalbox-rt", self.create_portalbox("rt"))
	self.write_cache_file("portalbox-rb", self.create_portalbox("rb"))
	## fourth, write 'last updated' information:
	self.write_cache_file("last-updated", time.strftime("%02d %b %04Y %02H:%02M:%02S %Z", time.localtime()))

	def create_navtrail(self, \
	as=0,
	header="""<table class="navtrailbox" border=0 cellspacing=0 cellpadding=0><tr><td width="15"> </td><td class="navtrailboxbody">""", \
	prolog="", \
	separator=">", \
	epilog="", \
	footer="</td></tr></table>",
	exclude_root=1,
	exclude_self=1):
	"""Creates navigation trail, i.e. links to collection ancestors.
	If as==1, then links to Advanced Search interfaces; otherwise Simple Search.
	"""
	# firstly, display navtrail prologue:
	navtrail = header
	# first, list list of ancestors:
	for dad in self.get_ancestors():
	if dad.name != cdsname:
	navtrail += """%s <a class="navtrail" href="%s/?c=%s&as=%d">%s</a> %s %s """ % \
	(prolog, weburl, urllib.quote_plus(dad.name), as, dad.name, epilog, separator)
	else: # hide cdsname for aesthetical reasons
	if not exclude_root:
	navtrail += """%s <a class="navtrail" href="%s/?as=%d">%s</a> %s %s """ % \
	(prolog, weburl, as, "Home", epilog, separator)
	# then print self:
	if not exclude_self and (self.id > 1 or (self.id==1 and not exclude_root)):
	navtrail += """%s %s %s""" % (prolog, self.name, epilog)
	# last, print navtrail epilogue:
	navtrail += footer
	# sanity check:
	if navtrail == header + prolog + epilog + footer:
	return ""
	return navtrail

	def create_nbrecs_info(self, prolog=""" <small class="nbdoccoll">(""", epilog=""")</small>"""):
	"Return information on the number of records."
	out = ""
	if self.nbrecs:
	out = prolog + nice_number(self.nbrecs) + epilog
	return out

	def create_portalbox(self, position="rt"):
	"""Creates portalboxes of the position 'position' by consulting MySQL configuration database.
	The position may be: 'lt'='left top', 'lb'='left bottom', 'rt'='right top', 'rb'='right bottom',
	'tl'='top left', 'tr'='top right', 'bl'='bottom left', 'br='bottom right'."""
	out = ""
	query = "SELECT p.title,p.body FROM portalbox AS p, collection_portalbox AS cp "\
	" WHERE cp.id_collection=%d AND p.id=cp.id_portalbox AND cp.position='%s' "\
	" ORDER BY cp.score DESC" % (self.id, position)
	res = run_sql(query)
	for row in res:
	out += """<table class="portalbox" width="100%%">
	<thead>
	<tr>
	<th class="portalboxheader">%s</th>
	</tr>
	</thead>
	<tbody>
	<tr>
	<td class="portalboxbody">%s</td>
	</tr>
	</tbody>
	</table>""" % (row[0], row[1])
	return out

	def create_narrowsearch(self, as=0, title="Narrow search:", type="r"):
	"""Creates list of collection descendants of type 'type' under title 'title'.
	If as==1, then links to Advanced Search interfaces; otherwise Simple Search.
	Suitable for 'Narrow search' and 'Focus on' boxes."""
	narrowsearch=""
	# return nothing for type 'v' (virtual collection) if there are no sons:
	if type == 'v' and not self.get_sons(type):
	return ""
	# firstly write silent 'cc' (=current collection) argument:
	if type == 'r': # but not for virtual collections
	narrowsearch += """<input type="hidden" name="cc" value="%s">""" % self.name
	# then get list of sons and analyse it:
	sons = self.get_sons(type)
	# decide upon writing style: if there are grandchildren, then print in bold
	descendants = self.get_descendants(type)
	if len(descendants)>len(sons):
	style_prolog = "<strong>"
	style_epilog = "</strong>"
	else:
	style_prolog = ""
	style_epilog = ""
	# are there some sons?
	if len(sons):
	narrowsearch += """<table class="searchbox">
	<thead>
	<tr>
	<th colspan="2" align="left" class="searchboxheader">
	%s
	</th>
	</tr>
	</thead>
	<tbody>""" % title
	# iterate through sons:
	for son in sons:
	narrowsearch += """<tr><td class="searchboxbody" valign="top">"""
	if type=='r':
	if son.restricted_p():
	narrowsearch += """<input type=checkbox name="c" value="%s"> </td>""" % son.name
	else:
	narrowsearch += """<input type=checkbox name="c" value="%s" checked> </td>""" % son.name
	narrowsearch += """<td valign="top"><a href="%s/?c=%s&as=%d">%s%s%s</a>%s """ % \
	(weburl, urllib.quote_plus(son.name), as, style_prolog, son.name, style_epilog, son.create_nbrecs_info())
	if son.restricted_p():
	narrowsearch += """ <small class="warning">[restricted]</small>"""
	grandsons = son.get_sons()
	nb_grandsons = len(grandsons)
	if nb_grandsons:
	# print all grandsons:
	narrowsearch += """<br>"""
	for i in range(0,nb_grandsons):
	narrowsearch += """<a href="%s/?c=%s&as=%d">%s</a>%s """ % \
	(weburl, urllib.quote_plus(grandsons[i].name), as, grandsons[i].name, \
	grandsons[i].create_nbrecs_info())
	narrowsearch += """</td></tr>"""
	narrowsearch += "</tbody></table>"
	else:
	if type == 'r':
	# no sons, and type 'r', so print info on collection content:
	narrowsearch += """<table class="searchbox">
	<thead>
	<tr>
	<th class="searchboxheader">
	Latest additions:
	</th>
	</tr>
	</thead>
	<tbody>
	<tr>
	<td class="searchboxbody">%s</td>
	</tr>
	<tbody>
	</table>""" % self.create_instant_browse()

	return narrowsearch

	def create_instant_browse(self, rg=cfg_instant_browse):
	"Searches database and produces list of last 'rg' records."
	box = ""
	if self.restricted_p():
	box += """<small><strong>Restricted!</strong></small><br> This collection has restricted content."""
	else:
	url = "%s/search.py?cc=%s&jrec=%d" % (weburl, urllib.quote_plus(self.name), rg+1)
	if self.nbrecs and self.reclist:
	# firstly, get last 'rg' records:
	box += "<ol>"
	recIDs = Numeric.nonzero(self.reclist)
	for idx in range(self.nbrecs-1, self.nbrecs-rg-1, -1):
	if idx>=0:
	box += """<li><p><small>""" + print_record(recIDs[idx]) + "</small></p>"
	box += "</ol>"
	if self.nbrecs > rg:
	box += """<div align="right"><small><a href="%s">[>> more]</a></small></div>""" % url
	else:
	box += """<small><strong>No records! </strong></small><br> This collection does not contain any document yet."""
	return box

	def create_searchoptions(self):
	"Produces 'Search options' portal box."
	# TODO: distinguish name/nameshort, i.e. naviguable/non-naviguable search options
	box=""
	query = """SELECT DISTINCT(cff.id_field),f.code,f.nameshort FROM collection_field_fieldvalue AS cff, field AS f
	WHERE cff.id_collection=%d AND cff.id_fieldvalue IS NOT NULL AND cff.id_field=f.id
	ORDER BY cff.score DESC""" % self.id
	res = run_sql(query)
	if res:
	for row in res:
	field_id = row[0]
	field_code = row[1]
	field_name = row[2]
	query_bis = """SELECT fv.value,fv.nameshort FROM fieldvalue AS fv, collection_field_fieldvalue AS cff
	WHERE cff.id_collection=%d AND cff.type='seo' AND cff.id_field=%d AND fv.id=cff.id_fieldvalue
	ORDER BY cff.score_fieldvalue DESC, cff.score DESC, fv.nameshort ASC""" % (self.id, field_id)
	res_bis = run_sql(query_bis)
	if res_bis:
	box += """<select name="%s">""" % field_code
	box += """<option value="">any %s""" % field_name
	for row_bis in res_bis:
	box += """<option value="%s">%s""" % (cgi.escape(row_bis[0], 1), row_bis[1])
	box += """</select>"""
	return box

	def create_inputdate(self, name="d1"):
	"Produces 'From Date', 'Until Date' kind of selection box. Suitable for search options."
	box = ""
	# day
	box += """<select name="%sd">""" % name
	box += """<option value="">any day"""
	for day in range(1,32):
	box += """<option value="%02d">%02d""" % (day, day)
	box += """</select>"""
	# month
	box += """<select name="%sm">""" % name
	box += """<option value="">any month"""
	for mm, month in [('01','January'), ('02','February'), ('03','March'), ('04','April'), \
	('05','May'), ('06','June'), ('07','July'), ('08','August'), \
	('09','September'), ('10','October'), ('11','November'), ('12','December')]:
	box += """<option value="%s">%s""" % (mm, month)
	box += """</select>"""
	# year
	box += """<select name="%sy">""" % name
	box += """<option value="">any year"""
	for year in range(1980,2004):
	box += """<option value="%d">%d""" % (year, year)
	box += """</select>"""
	return box

	def create_publishedin(self, title="From date", name="d1"):
	"""Produces 'Published in' selection box, if collection name contains the text 'Articles'.
	Suitable for advanced search option."""
	out = ""
	if string.find(self.name, "Article") == -1:
	return out
	# journal title:
	cell_1a = "<small><strong>Published in:</strong></small><br>"
	cell_1a += """<select name="pt">"""
	cell_1a += """<option value="Phys. Rev. A">Phys. Rev. A"""
	cell_1a += """<option value="Phys. Rev. B">Phys. Rev. B"""
	cell_1a += """<option value="Phys. Rev. C">Phys. Rev. C"""
	cell_1a += """<option value="Phys. Rev. D">Phys. Rev. D"""
	cell_1a += """<option value="Phys. Rev. E">Phys. Rev. E"""
	cell_1a += """</select>"""
	# volume:
	cell_1b = "<small><strong>Volume:</strong></small><br>"
	cell_1b += """<input type="text" name="pv" size="4" value="">"""
	# year:
	cell_1c = "<small><strong>Year:</strong></small><br>"
	cell_1c += """<input type="text" name="py" size="4" value="">"""
	# page:
	cell_1d = "<small><strong>Page:</strong></small><br>"
	cell_1d += """<input type="text" name="pp" size="4" value="">"""
	out += """<table cellpadding="3" cellspacing="0">
	<tr>
	<td colspan="3" height="3">
	</td>
	</tr>
	<tr valign="bottom" class="searchbox">
	<td align="left">%s</td>
	<td align="left">%s</td>
	<td align="left">%s</td>
	<td align="left">%s</td>
	</tr>
	</table>""" % \
	(cell_1a, cell_1b, cell_1c, cell_1d)
	return out

	def create_sortoptions(self):
	"Produces 'Sort options' portal box."
	box=""
	query = """SELECT f.code,f.nameshort FROM field AS f, collection_field_fieldvalue AS cff
	WHERE id_collection=%d AND cff.type='soo' AND cff.id_field=f.id
	ORDER BY cff.score DESC, f.name ASC""" % self.id
	box += """<select name="sf">"""
	box += """ <option value="">- latest first -"""
	res = run_sql(query)
	if res:
	for row in res:
	box += """ <option value="%s">%s""" % (row[0], row[1])
	else:
	for field in ('title','author','report number','year'):
	box += """ <option value="%s">%s""" % (field, field)
	box += """</select>"""
	box += """<select name="so">
	<option value="a">asc.
	<option value="d">desc.
	</select>"""
	return box

	def create_displayoptions(self):
	"Produces 'Display options' portal box."
	box=""
	# firstly, display hits per collection selection box:
	box += """<select name="rg">
	<option value=10>10 results
	<option value=25>25 results
	<option value=50>50 results
	<option value=10>100 results
	<option value=25>250 results
	<option value=50>500 results
	</select>"""
	# secondly, display split by collection selection box:
	if self.get_sons():
	box += """<select name="sc">
	<option value="0">all together
	<option value="1">split by collection
	</select>"""
	return box

	def create_formatoptions(self):
	"Produces 'Output format options' portal box."
	box = ""
	box += """<select name="of">
	"""
	query = """SELECT f.code,f.name FROM format AS f, collection_format AS cf
	WHERE cf.id_collection=%d AND cf.id_format=f.id ORDER BY cf.score DESC, f.name ASC""" % self.id
	res = run_sql(query)
	if res:
	# propose found formats:
	for row in res:
	box += """<option value="%s">%s""" % (row[0],row[1])
	else:
	# no formats are defined for this collection, so propose the default HTML one:
	box += """<option value="hb">HTML brief"""
	box += """</select>"""
	return box

	def create_searchwithin_selection_box(self, fieldname='f', value=''):
	"Produces 'search within' selection box for the current collection."
	out = ""
	out += """<select name="%s">""" % fieldname
	out += """<option value="">any field"""
	query = """SELECT f.code,f.name FROM field AS f, collection_field_fieldvalue AS cff
	WHERE cff.type='sew' AND cff.id_collection=%d AND cff.id_field=f.id
	ORDER BY cff.score DESC, f.name ASC""" % self.id
	res = run_sql(query)
	if res:
	# propose found 'search within' fields:
	for row in res:
	out += """<option value="%s"%s>%s""" % (row[0], is_selected(row[0], value), row[1])
	else:
	# no 'search within' fields are defined for this collection, so propose default ones:
	out += """<option value="title"%s>title""" % is_selected("title", value)
	out += """<option value="author"%s>author""" % is_selected("author", value)
	out += """<option value="abstract"%s>abstract""" % is_selected("abstract", value)
	out += """<option value="reportnumber"%s>report number""" % is_selected("reportnumber", value)
	out += """<option value="keyword"%s>keyword""" % is_selected("keyword", value)
	out += """</select>"""
	return out

	def create_searchexample(self):
	"Produces search example(s) for the current collection."
	out = "$collSearchExamples = getSearchExample(%d, $se);" % self.id
	return out

	def create_searchfor(self, as=0):
	"Produces either Simple or Advanced 'Search for' box for the current collection."
	if as == 1:
	return self.create_searchfor_advanced()
	else:
	return self.create_searchfor_simple()

	def create_searchfor_simple(self):
	"Produces simple 'Search for' box for the current collection."
	# print commentary start:
	out = "<!--create_searchfor_simple()-->"
	# define URL add-ons for simple and advanced search boxen:
	if self.name != cdsname:
	ssearchurl = "?c=%s&as=0" % urllib.quote_plus(self.name)
	asearchurl = "?c=%s&as=1" % urllib.quote_plus(self.name)
	else: # hide cdsname for aesthetical reasons
	ssearchurl = "?as=0"
	asearchurl = "?as=1"
	# define search box elements:
	header = """Search %s for:""" % self.create_nbrecs_info(""," records")
	cell_1_left = """<input type="text" name="p" size="40" value="">"""
	cell_1_middle = "%s" % self.create_searchwithin_selection_box()
	cell_1_right = """<input class="formbutton" type="submit" name="search" value="SEARCH"><input class="formbutton" type="submit" name="search" value="Browse">"""
	cell_2 = """<small><a href="%s/help/search/tips.html">search tips</a> :: <a href="%s">advanced search</a></small>""" \
	% (weburl, asearchurl)
	# merge them:
	out += """<table class="searchbox">
	<thead>
	<tr align="left">
	<th colspan="3" class="searchboxheader">%s</th>
	</tr>
	</thead>
	<tbody>
	<tr valign="baseline">
	<td class="searchboxbody" align="left">%s</td>
	<td class="searchboxbody" align="left">%s</td>
	<td class="searchboxbody" align="left">%s</td>
	</tr>
	<tr valign="baseline">
	<td class="searchboxbody" colspan="3" align="right">%s</td>
	</tr>
	</tbody>
	</table>""" % \
	(header,
	cell_1_left, cell_1_middle, cell_1_right,
	cell_2)
	# print commentary end:
	out += "<!--/create_searchfor_simple()-->"
	return out

	def create_searchfor_advanced(self):
	"Produces simple 'Search for' box for the current collection."
	# print commentary start:
	out = "<!--create_searchfor_advanced()-->"
	out += """<input type="hidden" name="as" value="1">"""
	# define URL add-ons for simple and advanced search boxen:
	if self.name != cdsname:
	ssearchurl = "?c=%s&as=0" % urllib.quote_plus(self.name)
	asearchurl = "?c=%s&as=1" % urllib.quote_plus(self.name)
	else: # hide cdsname for aesthetical reasons
	ssearchurl = "?as=0"
	asearchurl = "?as=1"
	# define search box elements:
	header = """Search %s for:""" % self.create_nbrecs_info("", " records")
	cell_1_left = create_matchtype_box('m1') + """<input type="text" name="p1" size="40" value="">"""
	cell_1_middle = self.create_searchwithin_selection_box('f1')
	cell_1_right = create_andornot_box('op1')
	cell_2_left = create_matchtype_box('m2') + """<input type="text" name="p2" size="40" value="">"""
	cell_2_middle = self.create_searchwithin_selection_box('f2')
	cell_2_right = create_andornot_box('op2')
	cell_3_left = create_matchtype_box('m3') + """<input type="text" name="p3" size="40" value="">"""
	cell_3_middle = self.create_searchwithin_selection_box('f3')
	cell_3_right = """<input class="formbutton" type="submit" name="search" value="SEARCH"><input class="formbutton" type="submit" name="search" value="Browse"> """
	cell_4 = """<small><a href="%s/help/search/tips.html">search tips</a> :: <a href="%s">simple search</a></small>""" \
	% (weburl, ssearchurl)
	# merge them:
	out += """<table class="searchbox">
	<thead>
	<tr>
	<th class="searchboxheader" colspan="3">%s</th>
	</tr>
	</thead>
	<tbody>
	<tr valign="bottom">
	<td nowrap>%s</td>
	<td class="searchboxbody">%s</td>
	<td class="searchboxbody">%s</td>
	</tr>
	<tr valign="bottom">
	<td class="searchboxbody" nowrap>%s</td>
	<td class="searchboxbody">%s</td>
	<td class="searchboxbody">%s</td>
	</tr>
	<tr valign="bottom">
	<td class="searchboxbody" nowrap>%s</td>
	<td class="searchboxbody">%s</td>
	<td class="searchboxbody" nowrap>%s</td>
	</tr>
	<tr valign="bottom">
	<td colspan="3" class="searchboxbody" align="right">%s</td>
	</tr>
	</tbody>
	</table>""" % \
	(header,
	cell_1_left, cell_1_middle, cell_1_right, \
	cell_2_left, cell_2_middle, cell_2_right, \
	cell_3_left, cell_3_middle, cell_3_right, \
	cell_4)
	# create more search options, if any:
	if self.create_searchoptions():
	out += """<table class="searchbox">
	<thead>
	<tr>
	<th class="searchboxheader">
	Search options:
	</th>
	</tr>
	</thead>
	<tbody>
	<tr valign="bottom">
	<td class="searchboxbody">%s</td>
	</tr>
	<tbody>
	</table>""" % self.create_searchoptions()
	if 0:
	# TODO: create published in, if appropriate:
	if self.create_publishedin():
	out += self.create_publishedin()
	# arrival date: from/until:
	cell_6_a = self.create_inputdate("d1")
	cell_6_b = self.create_inputdate("d2")
	out += """<table class="searchbox">
	<thead>
	<tr>
	<th class="searchboxheader">
	Added since:
	</th>
	<th class="searchboxheader">
	until:
	</th>
	</tr>
	</thead>
	<tbody>
	<tr valign="bottom">
	<td class="searchboxbody">%s</td>
	<td class="searchboxbody">%s</td>
	</tr>
	</tbody>
	</table>""" % \
	(cell_6_a, cell_6_b)
	# format options, etc in the last table:
	cell_7_a = self.create_sortoptions()
	cell_7_b = self.create_displayoptions()
	cell_7_c = self.create_formatoptions()
	out += """<table class="searchbox">
	<thead>
	<tr>
	<th class="searchboxheader">
	Sort results by:
	</th>
	<th class="searchboxheader">
	Display results:
	</th>
	<th class="searchboxheader">
	Output format:
	</th>
	</tr>
	</thead>
	<tbody>
	<tr valign="bottom">
	<td class="searchboxbody">%s</td>
	<td class="searchboxbody">%s</td>
	<td class="searchboxbody">%s</td>
	</tr>
	</tbody>
	</table>""" % \
	(cell_7_a, cell_7_b, cell_7_c)
	# print commentary end:
	out += "<!--/create_searchfor_advanced()-->"
	return out

	def calculate_reclist(self):
	"Calculates the reclist universe for given collection."
	reclist = HitList()
	if not self.dbquery:
	# A - collection does not have dbquery, so query recursively all its non-restricted sons:
	for coll in self.get_sons():
	if not coll.restricted_p():
	reclist.union(coll.calculate_reclist())
	else:
	# B - collection does have dbquery, so compute and return number of documents found:
	reclist = search_pattern(None,self.dbquery)
	reclist.calculate_nbhits()
	self.nbrecs = reclist._nbhits
	self.reclist = reclist._set
	return reclist

	def update_reclist_in_db(self):
	"Update the record universe for given collection; nbrecs, reclist of the collection table."
	Log("%s" % self.name)
	sys.stdout.flush()
	try:
	query = "UPDATE collection SET nbrecs=%d, reclist='%s' WHERE id=%d" % \
	(self.nbrecs, escape_string(zlib.compress(Numeric.dumps(self.reclist))), self.id)
	res = run_sql(query)
	except MySQLdb.Error, e:
	print "Database Query Error %d: %s." % (e.args[0], e.args[1])
	sys.exit(1)
	Log("...... updated %d records" % self.nbrecs)

	def usage(code, msg=''):
	"Prints usage info."
	if msg:
	sys.stderr.write("Error: %s.\n" % msg)
	sys.stderr.write("Usage: %s <action> [collection][+]\n" % sys.argv[0])
	sys.stderr.write("""Description: %s updates the collection cache
	(record universe for a given collection plus web page elements)
	based on WML and MySQL configuration parameters.
	If the collection name is passed as the second argument, it'll update
	this collection only. If the collection name is immediately followed
	by a plus sign, it will also update all its desdendants. The
	top-level collection name may be entered as the void string.\n""" % sys.argv[0])
	sys.stderr.write("Example: %s update-reclist\n" % sys.argv[0])
	sys.stderr.write("Example: %s update-webpage\n" % sys.argv[0])
	sys.stderr.write("Example: %s update-webpage \"Articles & Preprints\"\n" % sys.argv[0])
	sys.stderr.write("Example: %s update-webpage \"Articles & Preprints\"+\n" % sys.argv[0])
	sys.stderr.write("Example: %s update-webpage \"\"\n" % sys.argv[0])
	sys.stderr.write("Example: %s update-reclist \"\"+\n" % sys.argv[0])
	sys.exit(code)

	def main():
	"Main function to call creation of collection pages. Arguments described in usage() function."
	## 1 - firstly, try to read arguments:
	if len(sys.argv) < 2 or len(sys.argv) > 3:
	usage(0)
	action = sys.argv[1]
	if action != "update-reclist" and action != "update-webpage":
	usage(1, "Unknown action '%s'." % action)

	## 2 - secondly, construct collection list to treat:
	colls = []
	if len(sys.argv) == 3:
	# 2a - we start from certain collection only, so construct partial collection list:
	coll_sons_too = 0 # we don't treat collection descendants by default
	coll_name = sys.argv[2]
	if coll_name[-1:] == '+': # do we do also descendants?
	coll_name = coll_name[:-1]
	coll_sons_too = 1
	# only now add the page in question:
	coll = Collection(coll_name)
	if coll.id == None:
	usage(1, 'Collection %s does not exist' % coll_name)
	colls.append(coll)
	if coll_sons_too:
	colls.extend(coll.get_descendants())
	else:
	# 2b - do all collections:
	query = """SELECT name FROM collection ORDER BY id"""
	res = run_sql(query)
	for row in res:
	colls.append(Collection(row[0]))

	## 3 - thirdly, check and perform action
	action = sys.argv[1]
	if action == "update-reclist":
	for coll in colls:
	coll.calculate_reclist()
	coll.update_reclist_in_db()
	elif action == "update-webpage":
	for coll in colls:
	coll.update_webpage_cache()
	else:
	usage(1, "Unknown action '%s'." % action)

	### okay, here we go:
	if __name__ == '__main__':
	main()

webcoll.inNo OneTemporaryActions

File Metadata

webcoll.inView Options

Event Timeline

webcoll.in
No OneTemporary
Actions

webcoll.in
View Options