diff --git a/modules/miscutil/lib/dateutils.py b/modules/miscutil/lib/dateutils.py
index b227c270e..b945240f1 100644
--- a/modules/miscutil/lib/dateutils.py
+++ b/modules/miscutil/lib/dateutils.py
@@ -1,293 +1,293 @@
# -*- coding: utf-8 -*-
##
## Some functions about dates
##
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
API for date conversion and date related GUI creation.
Lexicon
datetext:
textual format => 'YEAR-MONTH-DAY HOUR:MINUTE:SECOND'
e.g. '2005-11-16 15:11:44'
default value: '0000-00-00 00:00:00'
datestruct:
tuple format => see http://docs.python.org/lib/module-time.html
(YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, WEEKDAY, YEARDAY, DAYLIGHT)
e.g. (2005, 11, 16, 15, 11, 44, 2, 320, 0)
default value: (0, 0, 0, 0, 0, 0, 0, 0, 0)
dategui:
textual format for output => 'DAY MONTH YEAR, HOUR:MINUTE'
e.g. '16 nov 2005, 15:11'
default value: _("N/A")
"""
__revision__ = "$Id$"
from time import strptime, strftime, localtime
from invenio.config import CFG_SITE_LANG
from invenio.messages import gettext_set_language
datetext_default = '0000-00-00 00:00:00'
datestruct_default = (0, 0, 0, 0, 0, 0, 0, 0, 0)
datetext_format = "%Y-%m-%d %H:%M:%S"
def convert_datetext_to_dategui(datetext, ln=CFG_SITE_LANG, secs=False):
"""
Convert:
'2005-11-16 15:11:57' => '16 nov 2005, 15:11'
Or optionally with seconds:
'2005-11-16 15:11:57' => '16 nov 2005, 15:11:57'
Month is internationalized
"""
try:
datestruct = convert_datetext_to_datestruct(datetext)
if datestruct == datestruct_default:
raise ValueError
month = get_i18n_month_name(datestruct[1], ln=ln)
if secs:
output_format = "%d " + month + " %Y, %H:%M:%S"
else:
output_format = "%d " + month + " %Y, %H:%M"
return strftime(output_format, datestruct)
except:
_ = gettext_set_language(ln)
return _("N/A")
def convert_datetext_to_datestruct(datetext):
"""
Convert:
'2005-11-16 15:11:57' => (2005, 11, 16, 15, 11, 44, 2, 320, 0)
"""
try:
return strptime(datetext, datetext_format)
except:
return datestruct_default
def convert_datestruct_to_dategui(datestruct, ln=CFG_SITE_LANG):
"""
Convert:
(2005, 11, 16, 15, 11, 44, 2, 320, 0) => '16 nov 2005, 15:11'
Month is internationalized
"""
try:
if datestruct[0] and datestruct[1] and datestruct[2]:
month = get_i18n_month_name(datestruct[1], ln=ln)
output_format = "%d " + month + " %Y, %H:%M"
return strftime(output_format, datestruct)
else:
raise ValueError
except:
_ = gettext_set_language(ln)
return _("N/A")
def convert_datestruct_to_datetext(datestruct):
"""
Convert:
(2005, 11, 16, 15, 11, 44, 2, 320, 0) => '2005-11-16 15:11:57'
"""
try:
return strftime(datetext_format, datestruct)
except:
return datetext_default
def convert_datecvs_to_datestruct(datecvs):
"""
Convert CVS $Date$ and
$Id$
formats into datestruct. Useful for later conversion of Last
updated timestamps in the page footers.
Example: '$Date$' => (2006, 09, 20, 19, 27, 11, 0, 0)
"""
try:
if datecvs.startswith("$Id"):
date_time = ' '.join(datecvs.split(" ")[3:5])
return strptime(date_time, '%Y/%m/%d %H:%M:%S')
else:
# here we have to use '$' + 'Date...' here, otherwise the CVS
# commit would erase this time format to put commit date:
return strptime(datecvs, '$' + 'Date: %Y/%m/%d %H:%M:%S $')
except ValueError:
return datestruct_default
def get_datetext(year, month, day):
"""
year=2005, month=11, day=16 => '2005-11-16 00:00:00'
"""
input_format = "%Y-%m-%d"
try:
datestruct = strptime("%i-%i-%i"% (year, month, day), input_format)
return strftime(datetext_format, datestruct)
except:
return datetext_default
def get_datestruct(year, month, day):
"""
year=2005, month=11, day=16 => (2005, 11, 16, 0, 0, 0, 2, 320, -1)
"""
input_format = "%Y-%m-%d"
try:
return strptime("%i-%i-%i"% (year, month, day), input_format)
except ValueError or TypeError:
return datestruct_default
def get_i18n_day_name(day_nb, display='short', ln=CFG_SITE_LANG):
"""
get the string representation of a weekday, internationalized
@param day_nb: number of weekday UNIX like.
=> 0=Sunday
@param ln: language for output
@return: the string representation of the day
"""
_ = gettext_set_language(ln)
if display == 'short':
days = {0: _("Sun"),
1: _("Mon"),
2: _("Tue"),
3: _("Wed"),
4: _("Thu"),
5: _("Fri"),
6: _("Sat")}
else:
days = {0: _("Sunday"),
1: _("Monday"),
2: _("Tuesday"),
3: _("Wednesday"),
4: _("Thursday"),
5: _("Friday"),
6: _("Saturday")}
return days[day_nb]
def get_i18n_month_name(month_nb, display='short', ln=CFG_SITE_LANG):
"""
get a non-numeric representation of a month, internationalized.
@param month_nb: number of month, (1 based!)
=>1=jan,..,12=dec
@param ln: language for output
@return: the string representation of month
"""
_ = gettext_set_language(ln)
if display == 'short':
- monthes = {0: _("Month"),
+ months = {0: _("Month"),
1: _("Jan"),
2: _("Feb"),
3: _("Mar"),
4: _("Apr"),
5: _("May"),
6: _("Jun"),
7: _("Jul"),
8: _("Aug"),
9: _("Sep"),
10: _("Oct"),
11: _("Nov"),
12: _("Dec")}
else:
- monthes = {0: _("Month"),
+ months = {0: _("Month"),
1: _("January"),
2: _("February"),
3: _("March"),
4: _("April"),
5: _("May"),
6: _("June"),
7: _("July"),
8: _("August"),
9: _("September"),
10: _("October"),
11: _("November"),
12: _("December")}
- return monthes[month_nb]
+ return months[month_nb]
def create_day_selectbox(name, selected_day=0, ln=CFG_SITE_LANG):
"""
Creates an HTML menu for day selection. (0..31 values).
@param name: name of the control (i.e. name of the var you'll get)
@param selected_day: preselect a day. Use 0 for the label 'Day'
@param ln: language of the menu
@return: html a string
"""
_ = gettext_set_language(ln)
out = "\n"
return out
def create_month_selectbox(name, selected_month=0, ln=CFG_SITE_LANG):
"""
Creates an HTML menu for month selection. Value of selected field is numeric
@param name: name of the control (your form will be sent with name=value...)
@param selected_month: preselect a month. use 0 for the Label 'Month'
@param ln: language of the menu
@return: html as string
"""
out = "\n"
return out
def create_year_inputbox(name, value=0):
"""
Creates an HTML field (simple input) for year selection.
@param name: name of the control (i.e. name of the variable you'll get)
@param value: prefilled value (int)
@return: html as string
"""
out = "\n"% (name, value)
return out
def create_year_selectbox(name, from_year=-1, length=10, selected_year=0, ln=CFG_SITE_LANG):
"""
Creates an HTML menu (dropdownbox) for year selection.
@param name: name of control( i.e. name of the variable you'll get)
@param from_year: year on which to begin. if <0 assume it is current year
@param length: number of items in menu
@param selected_year: initial selected year (if in range), else: label is selected
@param ln: language
@return: html as string
"""
_ = gettext_set_language(ln)
if from_year < 0:
from_year = localtime()[0]
out = "\n"
return out
diff --git a/modules/webcomment/lib/webcomment.py b/modules/webcomment/lib/webcomment.py
index b8a7fdc16..a2b9cce31 100644
--- a/modules/webcomment/lib/webcomment.py
+++ b/modules/webcomment/lib/webcomment.py
@@ -1,1766 +1,1766 @@
# -*- coding: utf-8 -*-
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
""" Comments and reviews for records """
__revision__ = "$Id$"
# non CDS Invenio imports:
import time
import math
import os
from datetime import datetime, timedelta
# CDS Invenio imports:
from invenio.dbquery import run_sql
from invenio.config import CFG_PREFIX, \
CFG_SITE_LANG, \
CFG_WEBALERT_ALERT_ENGINE_EMAIL,\
CFG_SITE_ADMIN_EMAIL,\
CFG_SITE_SUPPORT_EMAIL,\
CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL,\
CFG_SITE_URL,\
CFG_SITE_NAME,\
CFG_WEBCOMMENT_ALLOW_REVIEWS,\
CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS,\
CFG_WEBCOMMENT_ALLOW_COMMENTS,\
CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL,\
CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN,\
CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS,\
CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS,\
CFG_WEBCOMMENT_DEFAULT_MODERATOR
from invenio.webmessage_mailutils import \
email_quote_txt, \
email_quoted_txt2html
from invenio.webuser import get_user_info, get_email, collect_user_info
from invenio.dateutils import convert_datetext_to_dategui, \
datetext_default, \
convert_datestruct_to_datetext
from invenio.mailutils import send_email
from invenio.messages import wash_language, gettext_set_language
from invenio.urlutils import wash_url_argument
from invenio.webcomment_config import CFG_WEBCOMMENT_ACTION_CODE, \
CFG_WEBCOMMENT_EMAIL_REPLIES_TO, \
CFG_WEBCOMMENT_ROUND_DATAFIELD, \
CFG_WEBCOMMENT_RESTRICTION_DATAFIELD, \
CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH
from invenio.access_control_engine import acc_authorize_action
from invenio.access_control_admin import acc_is_role
from invenio.access_control_config import CFG_WEBACCESS_WARNING_MSGS
from invenio.search_engine import \
guess_primary_collection_of_a_record, \
check_user_can_view_record, \
get_fieldvalues, \
get_collection_reclist, \
get_colID
from invenio.webcomment_washer import EmailWasher
try:
import invenio.template
webcomment_templates = invenio.template.load('webcomment')
except:
pass
def perform_request_display_comments_or_remarks(req, recID, display_order='od', display_since='all', nb_per_page=100, page=1, ln=CFG_SITE_LANG, voted=-1, reported=-1, subscribed=0, reviews=0, uid=-1, can_send_comments=False, can_attach_files=False, user_is_subscribed_to_discussion=False, user_can_unsubscribe_from_discussion=False, display_comment_rounds=None):
"""
Returns all the comments (reviews) of a specific internal record or external basket record.
@param recID: record id where (internal record IDs > 0) or (external basket record IDs < -100)
@param display_order: hh = highest helpful score, review only
lh = lowest helpful score, review only
hs = highest star score, review only
ls = lowest star score, review only
od = oldest date
nd = newest date
@param display_since: all= no filtering by date
nd = n days ago
nw = n weeks ago
nm = n months ago
ny = n years ago
where n is a single digit integer between 0 and 9
@param nb_per_page: number of results per page
@param page: results page
@param voted: boolean, active if user voted for a review, see perform_request_vote function
@param reported: boolean, active if user reported a certain comment/review, perform_request_report function
@param subscribed: int, 1 if user just subscribed to discussion, -1 if unsubscribed
@param reviews: boolean, enabled if reviews, disabled for comments
@param uid: the id of the user who is reading comments
@param can_send_comments: if user can send comment or not
@param can_attach_files: if user can attach file to comment or not
@param user_is_subscribed_to_discussion: True if user already receives new comments by email
@param user_can_unsubscribe_from_discussion: True is user is allowed to unsubscribe from discussion
@return html body.
"""
errors = []
warnings = []
nb_reviews = 0
nb_comments = 0
# wash arguments
recID = wash_url_argument(recID, 'int')
ln = wash_language(ln)
display_order = wash_url_argument(display_order, 'str')
display_since = wash_url_argument(display_since, 'str')
nb_per_page = wash_url_argument(nb_per_page, 'int')
page = wash_url_argument(page, 'int')
voted = wash_url_argument(voted, 'int')
reported = wash_url_argument(reported, 'int')
reviews = wash_url_argument(reviews, 'int')
# vital argument check
(valid, error_body) = check_recID_is_in_range(recID, warnings, ln)
if not(valid):
return (error_body, errors, warnings)
# Query the database and filter results
user_info = collect_user_info(uid)
res = query_retrieve_comments_or_remarks(recID, display_order, display_since, reviews, user_info=user_info)
res2 = query_retrieve_comments_or_remarks(recID, display_order, display_since, not reviews, user_info=user_info)
nb_res = len(res)
if reviews:
nb_reviews = nb_res
nb_comments = len(res2)
else:
nb_reviews = len(res2)
nb_comments = nb_res
# checking non vital arguemnts - will be set to default if wrong
#if page <= 0 or page.lower() != 'all':
if page < 0:
page = 1
warnings.append(('WRN_WEBCOMMENT_INVALID_PAGE_NB',))
if nb_per_page < 0:
nb_per_page = 100
warnings.append(('WRN_WEBCOMMENT_INVALID_NB_RESULTS_PER_PAGE',))
if CFG_WEBCOMMENT_ALLOW_REVIEWS and reviews:
if display_order not in ['od', 'nd', 'hh', 'lh', 'hs', 'ls']:
display_order = 'hh'
warnings.append(('WRN_WEBCOMMENT_INVALID_REVIEW_DISPLAY_ORDER',))
else:
if display_order not in ['od', 'nd']:
display_order = 'od'
warnings.append(('WRN_WEBCOMMENT_INVALID_DISPLAY_ORDER',))
if not display_comment_rounds:
display_comment_rounds = []
# filter results according to page and number of reults per page
if nb_per_page > 0:
if nb_res > 0:
last_page = int(math.ceil(nb_res / float(nb_per_page)))
else:
last_page = 1
if page > last_page:
page = 1
warnings.append(("WRN_WEBCOMMENT_INVALID_PAGE_NB",))
if nb_res > nb_per_page: # if more than one page of results
if page < last_page:
res = res[(page-1)*(nb_per_page) : (page*nb_per_page)]
else:
res = res[(page-1)*(nb_per_page) : ]
else: # one page of results
pass
else:
last_page = 1
# Send to template
avg_score = 0.0
if not CFG_WEBCOMMENT_ALLOW_COMMENTS and not CFG_WEBCOMMENT_ALLOW_REVIEWS: # comments not allowed by admin
errors.append(('ERR_WEBCOMMENT_COMMENTS_NOT_ALLOWED',))
if reported > 0:
warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED',))
elif reported == 0:
warnings.append(('WRN_WEBCOMMENT_ALREADY_REPORTED',))
elif reported == -2:
warnings.append(('WRN_WEBCOMMENT_INVALID_REPORT',))
if CFG_WEBCOMMENT_ALLOW_REVIEWS and reviews:
avg_score = calculate_avg_score(res)
if voted > 0:
warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED',))
elif voted == 0:
warnings.append(('WRN_WEBCOMMENT_ALREADY_VOTED',))
if subscribed == 1:
warnings.append(('WRN_WEBCOMMENT_SUBSCRIBED',))
elif subscribed == -1:
warnings.append(('WRN_WEBCOMMENT_UNSUBSCRIBED',))
grouped_comments = group_comments_by_round(res, reviews)
# Clean list of comments round names
if not display_comment_rounds:
display_comment_rounds = []
elif 'all' in display_comment_rounds:
display_comment_rounds = [cmtgrp[0] for cmtgrp in grouped_comments]
elif 'latest' in display_comment_rounds:
if grouped_comments:
display_comment_rounds.append(grouped_comments[-1][0])
display_comment_rounds.remove('latest')
body = webcomment_templates.tmpl_get_comments(req,
recID,
ln,
nb_per_page, page, last_page,
display_order, display_since,
CFG_WEBCOMMENT_ALLOW_REVIEWS,
grouped_comments, nb_comments, avg_score,
warnings,
border=0,
reviews=reviews,
total_nb_reviews=nb_reviews,
uid=uid,
can_send_comments=can_send_comments,
can_attach_files=can_attach_files,
user_is_subscribed_to_discussion=\
user_is_subscribed_to_discussion,
user_can_unsubscribe_from_discussion=\
user_can_unsubscribe_from_discussion,
display_comment_rounds=display_comment_rounds)
return (body, errors, warnings)
def perform_request_vote(cmt_id, client_ip_address, value, uid=-1):
"""
Vote positively or negatively for a comment/review
@param cmt_id: review id
@param value: +1 for voting positively
-1 for voting negatively
@return: integer 1 if successful, integer 0 if not
"""
cmt_id = wash_url_argument(cmt_id, 'int')
client_ip_address = wash_url_argument(client_ip_address, 'str')
value = wash_url_argument(value, 'int')
uid = wash_url_argument(uid, 'int')
if cmt_id > 0 and value in [-1, 1] and check_user_can_vote(cmt_id, client_ip_address, uid):
action_date = convert_datestruct_to_datetext(time.localtime())
action_code = CFG_WEBCOMMENT_ACTION_CODE['VOTE']
query = """INSERT INTO cmtACTIONHISTORY (id_cmtRECORDCOMMENT,
id_bibrec, id_user, client_host, action_time,
action_code)
VALUES (%s, NULL ,%s, inet_aton(%s), %s, %s)"""
params = (cmt_id, uid, client_ip_address, action_date, action_code)
run_sql(query, params)
return query_record_useful_review(cmt_id, value)
else:
return 0
def check_user_can_comment(recID, client_ip_address, uid=-1):
""" Check if a user hasn't already commented within the last seconds
time limit: CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS
@param recID: record id
@param client_ip_address: IP => use: str(req.remote_ip)
@param uid: user id, as given by invenio.webuser.getUid(req)
"""
recID = wash_url_argument(recID, 'int')
client_ip_address = wash_url_argument(client_ip_address, 'str')
uid = wash_url_argument(uid, 'int')
max_action_time = time.time() - CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_COMMENTS_IN_SECONDS
max_action_time = convert_datestruct_to_datetext(time.localtime(max_action_time))
action_code = CFG_WEBCOMMENT_ACTION_CODE['ADD_COMMENT']
query = """SELECT id_bibrec
FROM cmtACTIONHISTORY
WHERE id_bibrec=%s AND
action_code=%s AND
action_time>%s
"""
params = (recID, action_code, max_action_time)
if uid < 0:
query += " AND client_host=inet_aton(%s)"
params += (client_ip_address,)
else:
query += " AND id_user=%s"
params += (uid,)
res = run_sql(query, params)
return len(res) == 0
def check_user_can_review(recID, client_ip_address, uid=-1):
""" Check if a user hasn't already reviewed within the last seconds
time limit: CFG_WEBCOMMENT_TIMELIMIT_PROCESSING_REVIEWS_IN_SECONDS
@param recID: record ID
@param client_ip_address: IP => use: str(req.remote_ip)
@param uid: user id, as given by invenio.webuser.getUid(req)
"""
action_code = CFG_WEBCOMMENT_ACTION_CODE['ADD_REVIEW']
query = """SELECT id_bibrec
FROM cmtACTIONHISTORY
WHERE id_bibrec=%s AND
action_code=%s
"""
params = (recID, action_code)
if uid < 0:
query += " AND client_host=inet_aton(%s)"
params += (client_ip_address,)
else:
query += " AND id_user=%s"
params += (uid,)
res = run_sql(query, params)
return len(res) == 0
def check_user_can_vote(cmt_id, client_ip_address, uid=-1):
""" Checks if a user hasn't already voted
@param cmt_id: comment id
@param client_ip_address: IP => use: str(req.remote_ip)
@param uid: user id, as given by invenio.webuser.getUid(req)
"""
cmt_id = wash_url_argument(cmt_id, 'int')
client_ip_address = wash_url_argument(client_ip_address, 'str')
uid = wash_url_argument(uid, 'int')
query = """SELECT id_cmtRECORDCOMMENT
FROM cmtACTIONHISTORY
WHERE id_cmtRECORDCOMMENT=%s"""
params = (cmt_id,)
if uid < 0:
query += " AND client_host=inet_aton(%s)"
params += (client_ip_address,)
else:
query += " AND id_user=%s"
params += (uid, )
res = run_sql(query, params)
return (len(res) == 0)
def get_comment_collection(cmt_id):
"""
Extract the collection where the comment is written
"""
query = "SELECT id_bibrec FROM cmtRECORDCOMMENT WHERE id=%s"
recid = run_sql(query, (cmt_id,))
record_primary_collection = guess_primary_collection_of_a_record(recid[0][0])
return record_primary_collection
def get_collection_moderators(collection):
"""
Return the list of comment moderators for the given collection.
"""
from invenio.access_control_engine import acc_get_authorized_emails
res = list(acc_get_authorized_emails('moderatecomments', collection=collection))
if not res:
return [CFG_WEBCOMMENT_DEFAULT_MODERATOR,]
return res
def perform_request_report(cmt_id, client_ip_address, uid=-1):
"""
Report a comment/review for inappropriate content.
Will send an email to the administrator if number of reports is a multiple of CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN
@param cmt_id: comment id
@return: integer 1 if successful, integer 0 if not. -2 if comment does not exist
"""
cmt_id = wash_url_argument(cmt_id, 'int')
if cmt_id <= 0:
return 0
(query_res, nb_abuse_reports) = query_record_report_this(cmt_id)
if query_res == 0:
return 0
elif query_res == -2:
return -2
if not(check_user_can_report(cmt_id, client_ip_address, uid)):
return 0
action_date = convert_datestruct_to_datetext(time.localtime())
action_code = CFG_WEBCOMMENT_ACTION_CODE['REPORT_ABUSE']
query = """INSERT INTO cmtACTIONHISTORY (id_cmtRECORDCOMMENT, id_bibrec,
id_user, client_host, action_time, action_code)
VALUES (%s, NULL, %s, inet_aton(%s), %s, %s)"""
params = (cmt_id, uid, client_ip_address, action_date, action_code)
run_sql(query, params)
if nb_abuse_reports % CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN == 0:
(cmt_id2,
id_bibrec,
id_user,
cmt_body,
cmt_date,
cmt_star,
cmt_vote, cmt_nb_votes_total,
cmt_title,
cmt_reported,
round_name,
restriction) = query_get_comment(cmt_id)
(user_nb_abuse_reports,
user_votes,
user_nb_votes_total) = query_get_user_reports_and_votes(int(id_user))
(nickname, user_email, last_login) = query_get_user_contact_info(id_user)
from_addr = '%s Alert Engine <%s>' % (CFG_SITE_NAME, CFG_WEBALERT_ALERT_ENGINE_EMAIL)
comment_collection = get_comment_collection(cmt_id)
to_addrs = get_collection_moderators(comment_collection)
subject = "A comment has been reported as inappropriate by a user"
body = '''
The following comment has been reported a total of %(cmt_reported)s times.
Author: nickname = %(nickname)s
email = %(user_email)s
user_id = %(uid)s
This user has:
total number of reports = %(user_nb_abuse_reports)s
%(votes)s
Comment: comment_id = %(cmt_id)s
record_id = %(id_bibrec)s
date written = %(cmt_date)s
nb reports = %(cmt_reported)s
%(review_stuff)s
body =
---start body---
%(cmt_body)s
---end body---
Please go to the record page %(comment_admin_link)s to delete this message if necessary. A warning will be sent to the user in question.''' % \
{ 'cfg-report_max' : CFG_WEBCOMMENT_NB_REPORTS_BEFORE_SEND_EMAIL_TO_ADMIN,
'nickname' : nickname,
'user_email' : user_email,
'uid' : id_user,
'user_nb_abuse_reports' : user_nb_abuse_reports,
'user_votes' : user_votes,
'votes' : CFG_WEBCOMMENT_ALLOW_REVIEWS and \
"total number of positive votes\t= %s\n\t\ttotal number of negative votes\t= %s" % \
(user_votes, (user_nb_votes_total - user_votes)) or "\n",
'cmt_id' : cmt_id,
'id_bibrec' : id_bibrec,
'cmt_date' : cmt_date,
'cmt_reported' : cmt_reported,
'review_stuff' : CFG_WEBCOMMENT_ALLOW_REVIEWS and \
"star score\t= %s\n\treview title\t= %s" % (cmt_star, cmt_title) or "",
'cmt_body' : cmt_body,
'comment_admin_link' : CFG_SITE_URL + "/record/" + str(id_bibrec) + '/comments#' + str(cmt_id),
'user_admin_link' : "user_admin_link" #! FIXME
}
#FIXME to be added to email when websession module is over:
#If you wish to ban the user, you can do so via the User Admin Panel %(user_admin_link)s.
send_email(from_addr, to_addrs, subject, body)
return 1
def check_user_can_report(cmt_id, client_ip_address, uid=-1):
""" Checks if a user hasn't already reported a comment
@param cmt_id: comment id
@param client_ip_address: IP => use: str(req.remote_ip)
@param uid: user id, as given by invenio.webuser.getUid(req)
"""
cmt_id = wash_url_argument(cmt_id, 'int')
client_ip_address = wash_url_argument(client_ip_address, 'str')
uid = wash_url_argument(uid, 'int')
query = """SELECT id_cmtRECORDCOMMENT
FROM cmtACTIONHISTORY
WHERE id_cmtRECORDCOMMENT=%s"""
params = (uid,)
if uid < 0:
query += " AND client_host=inet_aton(%s)"
params += (client_ip_address,)
else:
query += " AND id_user=%s"
params += (uid,)
res = run_sql(query, params)
return (len(res) == 0)
def query_get_user_contact_info(uid):
"""
Get the user contact information
@return: tuple (nickname, email, last_login), if none found return ()
Note: for the moment, if no nickname, will return email address up to the '@'
"""
query1 = """SELECT nickname, email,
DATE_FORMAT(last_login, '%%Y-%%m-%%d %%H:%%i:%%s')
FROM user WHERE id=%s"""
params1 = (uid,)
res1 = run_sql(query1, params1)
if res1:
return res1[0]
else:
return ()
def query_get_user_reports_and_votes(uid):
"""
Retrieve total number of reports and votes of a particular user
@param uid: user id
@return: tuple (total_nb_reports, total_nb_votes_yes, total_nb_votes_total)
if none found return ()
"""
query1 = """SELECT nb_votes_yes,
nb_votes_total,
nb_abuse_reports
FROM cmtRECORDCOMMENT
WHERE id_user=%s"""
params1 = (uid,)
res1 = run_sql(query1, params1)
if len(res1) == 0:
return ()
nb_votes_yes = nb_votes_total = nb_abuse_reports = 0
for cmt_tuple in res1:
nb_votes_yes += int(cmt_tuple[0])
nb_votes_total += int(cmt_tuple[1])
nb_abuse_reports += int(cmt_tuple[2])
return (nb_abuse_reports, nb_votes_yes, nb_votes_total)
def query_get_comment(comID):
"""
Get all fields of a comment
@param comID: comment id
@return: tuple (comID, id_bibrec, id_user, body, date_creation, star_score, nb_votes_yes, nb_votes_total, title, nb_abuse_reports, round_name, restriction)
if none found return ()
"""
query1 = """SELECT id,
id_bibrec,
id_user,
body,
DATE_FORMAT(date_creation, '%%Y-%%m-%%d %%H:%%i:%%s'),
star_score,
nb_votes_yes,
nb_votes_total,
title,
nb_abuse_reports,
round_name,
restriction
FROM cmtRECORDCOMMENT
WHERE id=%s"""
params1 = (comID,)
res1 = run_sql(query1, params1)
if len(res1)>0:
return res1[0]
else:
return ()
def query_record_report_this(comID):
"""
Increment the number of reports for a comment
@param comID: comment id
@return: tuple (success, new_total_nb_reports_for_this_comment) where
success is integer 1 if success, integer 0 if not, -2 if comment does not exist
"""
#retrieve nb_abuse_reports
query1 = "SELECT nb_abuse_reports FROM cmtRECORDCOMMENT WHERE id=%s"
params1 = (comID,)
res1 = run_sql(query1, params1)
if len(res1) == 0:
return (-2, 0)
#increment and update
nb_abuse_reports = int(res1[0][0]) + 1
query2 = "UPDATE cmtRECORDCOMMENT SET nb_abuse_reports=%s WHERE id=%s"
params2 = (nb_abuse_reports, comID)
res2 = run_sql(query2, params2)
return (int(res2), nb_abuse_reports)
def query_record_useful_review(comID, value):
"""
private funciton
Adjust the number of useful votes and number of total votes for a comment.
@param comID: comment id
@param value: +1 or -1
@return: integer 1 if successful, integer 0 if not
"""
# retrieve nb_useful votes
query1 = "SELECT nb_votes_total, nb_votes_yes FROM cmtRECORDCOMMENT WHERE id=%s"
params1 = (comID,)
res1 = run_sql(query1, params1)
if len(res1)==0:
return 0
# modify and insert new nb_useful votes
nb_votes_yes = int(res1[0][1])
if value >= 1:
nb_votes_yes = int(res1[0][1]) + 1
nb_votes_total = int(res1[0][0]) + 1
query2 = "UPDATE cmtRECORDCOMMENT SET nb_votes_total=%s, nb_votes_yes=%s WHERE id=%s"
params2 = (nb_votes_total, nb_votes_yes, comID)
res2 = run_sql(query2, params2)
return int(res2)
def query_retrieve_comments_or_remarks(recID, display_order='od', display_since='0000-00-00 00:00:00',
ranking=0, limit='all', user_info=None):
"""
Private function
Retrieve tuple of comments or remarks from the database
@param recID: record id
@param display_order: hh = highest helpful score
lh = lowest helpful score
hs = highest star score
ls = lowest star score
od = oldest date
nd = newest date
@param display_since: datetime, e.g. 0000-00-00 00:00:00
@param ranking: boolean, enabled if reviews, disabled for comments
@param limit: number of comments/review to return
@return: tuple of comment where comment is
tuple (nickname, uid, date_creation, body, status, id) if ranking disabled or
tuple (nickname, uid, date_creation, body, status, nb_votes_yes, nb_votes_total, star_score, title, id)
Note: for the moment, if no nickname, will return email address up to '@'
"""
display_since = calculate_start_date(display_since)
order_dict = { 'hh' : "cmt.nb_votes_yes/(cmt.nb_votes_total+1) DESC, cmt.date_creation DESC ",
'lh' : "cmt.nb_votes_yes/(cmt.nb_votes_total+1) ASC, cmt.date_creation ASC ",
'ls' : "cmt.star_score ASC, cmt.date_creation DESC ",
'hs' : "cmt.star_score DESC, cmt.date_creation DESC ",
'nd' : "cmt.reply_order_cached_data DESC ",
'od' : "cmt.reply_order_cached_data ASC "
}
# Ranking only done for comments and when allowed
if ranking and recID > 0:
try:
display_order = order_dict[display_order]
except:
display_order = order_dict['od']
else:
# in case of recID > 0 => external record => no ranking!
ranking = 0
try:
if display_order[-1] == 'd':
display_order = order_dict[display_order]
else:
display_order = order_dict['od']
except:
display_order = order_dict['od']
#display_order = order_dict['nd']
query = """SELECT user.nickname,
cmt.id_user,
DATE_FORMAT(cmt.date_creation, '%%%%Y-%%%%m-%%%%d %%%%H:%%%%i:%%%%s'),
cmt.body,
cmt.status,
cmt.nb_abuse_reports,
%(ranking)s cmt.id,
cmt.round_name,
cmt.restriction,
%(reply_to_column)s
FROM cmtRECORDCOMMENT cmt LEFT JOIN user ON
user.id=cmt.id_user
WHERE cmt.id_bibrec=%%s
%(ranking_only)s
%(display_since)s
ORDER BY %(display_order)s
""" % {'ranking' : ranking and ' cmt.nb_votes_yes, cmt.nb_votes_total, cmt.star_score, cmt.title, ' or '',
'ranking_only' : ranking and ' AND cmt.star_score>0 ' or ' AND cmt.star_score=0 ',
# 'id_bibrec' : recID > 0 and 'cmt.id_bibrec' or 'cmt.id_bibrec_or_bskEXTREC',
# 'table' : recID > 0 and 'cmtRECORDCOMMENT' or 'bskRECORDCOMMENT',
'display_since' : display_since == '0000-00-00 00:00:00' and ' ' or 'AND cmt.date_creation>=\'%s\' ' % display_since,
'display_order': display_order,
'reply_to_column': recID > 0 and 'cmt.in_reply_to_id_cmtRECORDCOMMENT' or 'cmt.in_reply_to_id_bskRECORDCOMMENT'}
params = (recID,)
res = run_sql(query, params)
# return res
new_limit = limit
comments_list = []
for row in res:
restriction = (ranking and row[12] or row[8])
if user_info and check_user_can_view_comment(user_info, None, restriction)[0] != 0:
# User cannot view comment. Look further
continue
comments_list.append(row)
if limit.isdigit():
new_limit -= 1
if limit < 1:
break
if comments_list:
if limit.isdigit():
return comments_list[:limit]
else:
return comments_list
return ()
## def get_comment_children(comID):
## """
## Returns the list of children (i.e. direct descendants) ordered by time of addition.
## @param comID: the ID of the comment for which we want to retrieve children
## @type comID: int
## @return the list of children
## @rtype: list
## """
## res = run_sql("SELECT id FROM cmtRECORDCOMMENT WHERE in_reply_to_id_cmtRECORDCOMMENT=%s", (comID,))
## return [row[0] for row in res]
## def get_comment_descendants(comID, depth=None):
## """
## Returns the list of descendants of the given comment, orderd from
## oldest to newest ("top-down"), down to depth specified as parameter.
## @param comID: the ID of the comment for which we want to retrieve descendant
## @type comID: int
## @param depth: the max depth down to which we want to retrieve
## descendants. Specify None for no limit, 1 for direct
## children only, etc.
## @return the list of ancestors
## @rtype: list(tuple(comment ID, descendants comments IDs))
## """
## if depth == 0:
## return (comID, [])
## res = run_sql("SELECT id FROM cmtRECORDCOMMENT WHERE in_reply_to_id_cmtRECORDCOMMENT=%s", (comID,))
## if res:
## children_comID = [row[0] for row in res]
## children_descendants = []
## if depth:
## depth -= 1
## children_descendants = [get_comment_descendants(child_comID, depth) for child_comID in children_comID]
## return (comID, children_descendants)
## else:
## return (comID, [])
def get_comment_ancestors(comID, depth=None):
"""
Returns the list of ancestors of the given comment, ordered from
oldest to newest ("top-down": direct parent of comID is at last position),
up to given depth
@param comID: the ID of the comment for which we want to retrieve ancestors
@type comID: int
@param depth: the maximum of levels up from the given comment we
want to retrieve ancestors. None for no limit, 1 for
direct parent only, etc.
@type depth: int
@return the list of ancestors
@rtype: list
"""
if depth == 0:
return []
res = run_sql("SELECT in_reply_to_id_cmtRECORDCOMMENT FROM cmtRECORDCOMMENT WHERE id=%s", (comID,))
if res:
parent_comID = res[0][0]
if parent_comID == 0:
return []
parent_ancestors = []
if depth:
depth -= 1
parent_ancestors = get_comment_ancestors(parent_comID, depth)
parent_ancestors.append(parent_comID)
return parent_ancestors
else:
return []
def get_reply_order_cache_data(comid):
"""
Prepare a representation of the comment ID given as parameter so
that it is suitable for byte ordering in MySQL.
"""
return "%s%s%s%s" % (chr((comid >> 24) % 256), chr((comid >> 16) % 256),
chr((comid >> 8) % 256), chr(comid % 256))
def query_add_comment_or_remark(reviews=0, recID=0, uid=-1, msg="",
note="", score=0, priority=0,
client_ip_address='', editor_type='textarea',
req=None, reply_to=None, attached_files=None):
"""
Private function
Insert a comment/review or remarkinto the database
@param recID: record id
@param uid: user id
@param msg: comment body
@param note: comment title
@param score: review star score
@param priority: remark priority #!FIXME
@param editor_type: the kind of editor used to submit the comment: 'textarea', 'fckeditor'
@param req: request object. If provided, email notification are sent after we reply to user request.
@param reply_to: the id of the comment we are replying to with this inserted comment.
@return: integer >0 representing id if successful, integer 0 if not
"""
current_date = calculate_start_date('0d')
#change utf-8 message into general unicode
msg = msg.decode('utf-8')
note = note.decode('utf-8')
#change general unicode back to utf-8
msg = msg.encode('utf-8')
note = note.encode('utf-8')
(restriction, round_name) = get_record_status(recID)
if attached_files is None:
attached_files = {}
if reply_to and CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH >= 0:
# Check that we have not reached max depth
comment_ancestors = get_comment_ancestors(reply_to)
if len(comment_ancestors) >= CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH:
if CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH == 0:
reply_to = None
else:
reply_to = comment_ancestors[CFG_WEBCOMMENT_MAX_COMMENT_THREAD_DEPTH - 1]
# Inherit restriction and group/round of 'parent'
comment = query_get_comment(reply_to)
if comment:
(round_name, restriction) = comment[10:12]
if editor_type == 'fckeditor':
# Here we remove the line feeds introduced by FCKeditor (they
# have no meaning for the user) and replace the HTML line
# breaks by linefeeds, so that we are close to an input that
# would be done without the FCKeditor. That's much better if a
# reply to a comment is made with a browser that does not
# support FCKeditor.
msg = msg.replace('\n', '').replace('\r', '').replace(' ', '\n')
query = """INSERT INTO cmtRECORDCOMMENT (id_bibrec,
id_user,
body,
date_creation,
star_score,
nb_votes_total,
title,
round_name,
restriction,
in_reply_to_id_cmtRECORDCOMMENT)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
params = (recID, uid, msg, current_date, score, 0, note, round_name, restriction, reply_to or 0)
res = run_sql(query, params)
if res:
new_comid = int(res)
move_attached_files_to_storage(attached_files, recID, new_comid)
parent_reply_order = run_sql("""SELECT reply_order_cached_data from cmtRECORDCOMMENT where id=%s""", (reply_to,))
if not parent_reply_order or parent_reply_order[0][0] is None:
# This is not a reply, but a first 0-level comment
parent_reply_order = ''
else:
parent_reply_order = parent_reply_order[0][0]
run_sql("""UPDATE cmtRECORDCOMMENT SET reply_order_cached_data=%s WHERE id=%s""",
(parent_reply_order + get_reply_order_cache_data(new_comid), new_comid))
action_code = CFG_WEBCOMMENT_ACTION_CODE[reviews and 'ADD_REVIEW' or 'ADD_COMMENT']
action_time = convert_datestruct_to_datetext(time.localtime())
query2 = """INSERT INTO cmtACTIONHISTORY (id_cmtRECORDCOMMENT,
id_bibrec, id_user, client_host, action_time, action_code)
VALUES (%s, %s, %s, inet_aton(%s), %s, %s)"""
params2 = (res, recID, uid, client_ip_address, action_time, action_code)
run_sql(query2, params2)
def notify_subscribers_callback(data):
"""
Define a callback that retrieves subscribed users, and
notify them by email.
@param data: contains the necessary parameters in a tuple:
(recid, uid, comid, msg, note, score, editor_type, reviews)
"""
recid, uid, comid, msg, note, score, editor_type, reviews = data
# Email this comment to 'subscribers'
(subscribers_emails1, subscribers_emails2) = \
get_users_subscribed_to_discussion(recid)
email_subscribers_about_new_comment(recid, reviews=reviews,
emails1=subscribers_emails1,
emails2=subscribers_emails2,
comID=comid, msg=msg,
note=note, score=score,
editor_type=editor_type, uid=uid)
# Register our callback to notify subscribed people after
# having replied to our current user.
data = (recID, uid, res, msg, note, score, editor_type, reviews)
if req:
req.register_cleanup(notify_subscribers_callback, data)
else:
notify_subscribers_callback(data)
return int(res)
def move_attached_files_to_storage(attached_files, recID, comid):
"""
Move the files that were just attached to a new comment to their
final location.
@param attached_files: the mappings of desired filename to attach
and path where to find the original file
@type attached_files: dict {filename, filepath}
@param recID: the record ID to which we attach the files
@param comid: the comment ID to which we attach the files
"""
for filename, filepath in attached_files.iteritems():
os.renames(filepath,
os.path.join(CFG_PREFIX, 'var', 'data', 'comments',
str(recID), str(comid), filename))
def get_attached_files(recid, comid):
"""
Returns a list with tuples (filename, filepath, fileurl)
@param recid: the recid to which the comment belong
@param comid: the commment id for which we want to retrieve files
"""
base_dir = os.path.join(CFG_PREFIX, 'var', 'data', 'comments',
str(recid), str(comid))
if os.path.isdir(base_dir):
filenames = os.listdir(base_dir)
return [(filename, os.path.join(CFG_PREFIX, 'var', 'data', 'comments',
str(recid), str(comid), filename),
CFG_SITE_URL + '/record/' + str(recid) + '/comments/attachments/get/' + str(comid) + '/' + filename) \
for filename in filenames]
else:
return []
def subscribe_user_to_discussion(recID, uid):
"""
Subscribe a user to a discussion, so the she receives by emails
all new new comments for this record.
@param recID: record ID corresponding to the discussion we want to
subscribe the user
@param uid: user id
"""
query = """INSERT INTO cmtSUBSCRIPTION (id_bibrec, id_user, creation_time)
VALUES (%s, %s, %s)"""
params = (recID, uid, convert_datestruct_to_datetext(time.localtime()))
try:
run_sql(query, params)
except:
return 0
return 1
def unsubscribe_user_from_discussion(recID, uid):
"""
Unsubscribe users from a discussion.
@param recID: record ID corresponding to the discussion we want to
unsubscribe the user
@param uid: user id
@return 1 if successful, 0 if not
"""
query = """DELETE FROM cmtSUBSCRIPTION
WHERE id_bibrec=%s AND id_user=%s"""
params = (recID, uid)
try:
res = run_sql(query, params)
except:
return 0
if res > 0:
return 1
return 0
def get_user_subscription_to_discussion(recID, uid):
"""
Returns the type of subscription for the given user to this
discussion. This does not check authorizations (for eg. if user
was subscribed, but is suddenly no longer authorized).
@param recID: record ID
@param uid: user id
@return:
- 0 if user is not subscribed to discussion
- 1 if user is subscribed, and is allowed to unsubscribe
- 2 if user is subscribed, but cannot unsubscribe
"""
user_email = get_email(uid)
(emails1, emails2) = get_users_subscribed_to_discussion(recID, check_authorizations=False)
if user_email in emails1:
return 1
elif user_email in emails2:
return 2
else:
return 0
def get_users_subscribed_to_discussion(recID, check_authorizations=True):
"""
Returns the lists of users subscribed to a given discussion.
Two lists are returned: the first one is the list of emails for
users who can unsubscribe from the discussion, the second list
contains the emails of users who cannot unsubscribe (for eg. author
of the document, etc).
Users appear in only one list. If a user has manually subscribed
to a discussion AND is an automatic recipients for updates, it
will only appear in the second list.
@param recID: record ID for which we want to retrieve subscribed users
@param check_authorizations: if True, check again if users are authorized to view comment
@return tuple (emails1, emails2)
"""
subscribers_emails = {}
# Get users that have subscribed to this discussion
query = """SELECT id_user FROM cmtSUBSCRIPTION WHERE id_bibrec=%s"""
params = (recID,)
res = run_sql(query, params)
for row in res:
uid = row[0]
if check_authorizations:
user_info = collect_user_info(uid)
(auth_code, auth_msg) = check_user_can_view_comments(user_info, recID)
else:
# Don't check and grant access
auth_code = False
if auth_code:
# User is no longer authorized to view comments.
# Delete subscription
unsubscribe_user_from_discussion(recID, uid)
else:
email = get_email(uid)
if '@' in email:
subscribers_emails[email] = True
# Get users automatically subscribed, based on the record metadata
collections_with_auto_replies = CFG_WEBCOMMENT_EMAIL_REPLIES_TO.keys()
for collection in collections_with_auto_replies:
if (get_colID(collection) is not None) and \
(recID in get_collection_reclist(collection)):
fields = CFG_WEBCOMMENT_EMAIL_REPLIES_TO[collection]
for field in fields:
emails = get_fieldvalues(recID, field)
for email in emails:
if not '@' in email:
# Is a group: add domain name
subscribers_emails[email + '@' + \
CFG_SITE_SUPPORT_EMAIL.split('@')[1]] = False
else:
subscribers_emails[email] = False
return ([email for email, can_unsubscribe_p \
in subscribers_emails.iteritems() if can_unsubscribe_p],
[email for email, can_unsubscribe_p \
in subscribers_emails.iteritems() if not can_unsubscribe_p] )
def email_subscribers_about_new_comment(recID, reviews, emails1,
emails2, comID, msg="",
note="", score=0,
editor_type='textarea',
ln=CFG_SITE_LANG, uid=-1):
"""
Notify subscribers that a new comment was posted.
FIXME: consider recipient preference to send email in correct language.
@param recID: record id
@param emails1: list of emails for users who can unsubscribe from discussion
@param emails2: list of emails for users who cannot unsubscribe from discussion
@param comID: the comment id
@param msg: comment body
@param note: comment title
@param score: review star score
@param editor_type: the kind of editor used to submit the comment: 'textarea', 'fckeditor'
@rtype: bool
@return: True if email was sent okay, False if it was not.
"""
_ = gettext_set_language(ln)
if not emails1 and not emails2:
return 0
# Get title
titles = get_fieldvalues(recID, "245__a")
if not titles:
# usual title not found, try conference title:
titles = get_fieldvalues(recID, "111__a")
title = ''
if titles:
title = titles[0]
else:
title = _("Record %i") % recID
# Get report number
report_numbers = get_fieldvalues(recID, "037__a")
if not report_numbers:
report_numbers = get_fieldvalues(recID, "088__a")
if not report_numbers:
report_numbers = get_fieldvalues(recID, "021__a")
# Prepare email subject and body
if reviews:
email_subject = _('%(report_number)s"%(title)s" has been reviewed') % \
{'report_number': report_numbers and ('[' + report_numbers[0] + '] ') or '',
'title': title}
else:
email_subject = _('%(report_number)s"%(title)s" has been commented') % \
{'report_number': report_numbers and ('[' + report_numbers[0] + '] ') or '',
'title': title}
washer = EmailWasher()
msg = washer.wash(msg)
email_content = msg
if note:
email_content = note + email_content
# Send emails to people who can unsubscribe
email_header = webcomment_templates.tmpl_email_new_comment_header(recID,
title,
reviews,
comID,
report_numbers,
can_unsubscribe=True,
ln=ln,
uid=uid)
email_footer = webcomment_templates.tmpl_email_new_comment_footer(recID,
title,
reviews,
comID,
report_numbers,
can_unsubscribe=True,
ln=ln)
res1 = True
if emails1:
res1 = send_email(fromaddr=CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL,
toaddr=emails1,
subject=email_subject,
content=email_content,
header=email_header,
footer=email_footer,
ln=ln)
# Then send email to people who have been automatically
# subscribed to the discussion (they cannot unsubscribe)
email_header = webcomment_templates.tmpl_email_new_comment_header(recID,
title,
reviews,
comID,
report_numbers,
can_unsubscribe=False,
ln=ln,
uid=uid)
email_footer = webcomment_templates.tmpl_email_new_comment_footer(recID,
title,
reviews,
comID,
report_numbers,
can_unsubscribe=False,
ln=ln)
res2 = True
if emails2:
res2 = send_email(fromaddr=CFG_WEBCOMMENT_ALERT_ENGINE_EMAIL,
toaddr=emails2,
subject=email_subject,
content=email_content,
header=email_header,
footer=email_footer,
ln=ln)
return res1 and res2
def get_record_status(recid):
"""
Returns the current status of the record, i.e. current restriction to apply for newly submitted
comments, and current commenting round.
The restriction to apply can be found in the record metadata, in
field(s) defined by config CFG_WEBCOMMENT_RESTRICTION_DATAFIELD. The restriction is empty string ""
in cases where the restriction has not explicitely been set, even
if the record itself is restricted.
@param recid: the record id
@type recid: int
@return tuple(restriction, round_name), where 'restriction' is empty string when no restriction applies
@rtype (string, int)
"""
collections_with_rounds = CFG_WEBCOMMENT_ROUND_DATAFIELD.keys()
commenting_round = ""
for collection in collections_with_rounds:
# Find the first collection defines rounds field for this
# record
if get_colID(collection) is not None and \
(recid in get_collection_reclist(collection)):
commenting_rounds = get_fieldvalues(recid, CFG_WEBCOMMENT_ROUND_DATAFIELD.get(collection, ""))
if commenting_rounds:
commenting_round = commenting_rounds[0]
break
collections_with_restrictions = CFG_WEBCOMMENT_RESTRICTION_DATAFIELD.keys()
restriction = ""
for collection in collections_with_restrictions:
# Find the first collection that defines restriction field for
# this record
if get_colID(collection) is not None and \
recid in get_collection_reclist(collection):
restrictions = get_fieldvalues(recid, CFG_WEBCOMMENT_RESTRICTION_DATAFIELD.get(collection, ""))
if restrictions:
restriction = restrictions[0]
break
return (restriction, commenting_round)
def calculate_start_date(display_since):
"""
Private function
Returns the datetime of display_since argument in MYSQL datetime format
calculated according to the local time.
@param display_since: = all= no filtering
nd = n days ago
nw = n weeks ago
nm = n months ago
ny = n years ago
where n is a single digit number
@return: string of wanted datetime.
If 'all' given as argument, will return datetext_default
datetext_default is defined in miscutils/lib/dateutils and
equals 0000-00-00 00:00:00 => MySQL format
If bad arguement given, will return datetext_default
"""
time_types = {'d':0, 'w':0, 'm':0, 'y':0}
today = datetime.today()
try:
nb = int(display_since[:-1])
except:
return datetext_default
if display_since in [None, 'all']:
return datetext_default
if str(display_since[-1]) in time_types:
time_type = str(display_since[-1])
else:
return datetext_default
# year
if time_type == 'y':
if (int(display_since[:-1]) > today.year - 1) or (int(display_since[:-1]) < 1):
# 1 < nb years < 2008
return datetext_default
else:
final_nb_year = today.year - nb
yesterday = today.replace(year=final_nb_year)
# month
elif time_type == 'm':
- # to convert nb of monthes in years
+ # to convert nb of months in years
nb_year = nb / 12 # nb_year = number of year to substract
nb = nb % 12
- if nb > today.month-1: # ex: july(07)-9 monthes = -1year -3monthes
+ if nb > today.month-1: # ex: july(07)-9 months = -1year -3months
nb_year += 1
nb_month = 12 - (today.month % nb)
else:
nb_month = today.month - nb
final_nb_year = today.year - nb_year # final_nb_year = number of year to print
yesterday = today.replace(year=final_nb_year, month=nb_month)
# week
elif time_type == 'w':
delta = timedelta(weeks=nb)
yesterday = today - delta
# day
elif time_type == 'd':
delta = timedelta(days=nb)
yesterday = today - delta
return yesterday.strftime("%Y-%m-%d %H:%M:%S")
def count_comments(recID):
"""
Returns the number of comments made on a record.
"""
recID = int(recID)
query = """SELECT count(id) FROM cmtRECORDCOMMENT
WHERE id_bibrec=%s AND star_score=0"""
return run_sql(query, (recID,))[0][0]
def count_reviews(recID):
"""
Returns the number of reviews made on a record.
"""
recID = int(recID)
query = """SELECT count(id) FROM cmtRECORDCOMMENT
WHERE id_bibrec=%s AND star_score>0"""
return run_sql(query, (recID,))[0][0]
def get_first_comments_or_remarks(recID=-1,
ln=CFG_SITE_LANG,
nb_comments='all',
nb_reviews='all',
voted=-1,
reported=-1,
user_info=None):
"""
Gets nb number comments/reviews or remarks.
In the case of comments, will get both comments and reviews
Comments and remarks sorted by most recent date, reviews sorted by highest helpful score
@param recID: record id
@param ln: language
@param nb_comments: number of comment or remarks to get
@param nb_reviews: number of reviews or remarks to get
@param voted: 1 if user has voted for a remark
@param reported: 1 if user has reported a comment or review
@return: if comment, tuple (comments, reviews) both being html of first nb comments/reviews
if remark, tuple (remakrs, None)
"""
warnings = []
errors = []
voted = wash_url_argument(voted, 'int')
reported = wash_url_argument(reported, 'int')
## check recID argument
if type(recID) is not int:
return ()
if recID >= 1: #comment or review. NB: suppressed reference to basket (handled in webbasket)
if CFG_WEBCOMMENT_ALLOW_REVIEWS:
res_reviews = query_retrieve_comments_or_remarks(recID=recID, display_order="hh", ranking=1,
limit=nb_comments, user_info=user_info)
nb_res_reviews = len(res_reviews)
## check nb argument
if type(nb_reviews) is int and nb_reviews < len(res_reviews):
first_res_reviews = res_reviews[:nb_reviews]
else:
first_res_reviews = res_reviews
if CFG_WEBCOMMENT_ALLOW_COMMENTS:
res_comments = query_retrieve_comments_or_remarks(recID=recID, display_order="od", ranking=0,
limit=nb_reviews, user_info=user_info)
nb_res_comments = len(res_comments)
## check nb argument
if type(nb_comments) is int and nb_comments < len(res_comments):
first_res_comments = res_comments[:nb_comments]
else:
first_res_comments = res_comments
else: #error
errors.append(('ERR_WEBCOMMENT_RECID_INVALID', recID)) #!FIXME dont return error anywhere since search page
# comment
if recID >= 1:
comments = reviews = ""
if reported > 0:
warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED_GREEN_TEXT',))
elif reported == 0:
warnings.append(('WRN_WEBCOMMENT_FEEDBACK_NOT_RECORDED_RED_TEXT',))
if CFG_WEBCOMMENT_ALLOW_COMMENTS: # normal comments
grouped_comments = group_comments_by_round(first_res_comments, ranking=0)
comments = webcomment_templates.tmpl_get_first_comments_without_ranking(recID, ln, grouped_comments, nb_res_comments, warnings)
if CFG_WEBCOMMENT_ALLOW_REVIEWS: # ranked comments
#calculate average score
avg_score = calculate_avg_score(res_reviews)
if voted > 0:
warnings.append(('WRN_WEBCOMMENT_FEEDBACK_RECORDED_GREEN_TEXT',))
elif voted == 0:
warnings.append(('WRN_WEBCOMMENT_FEEDBACK_NOT_RECORDED_RED_TEXT',))
grouped_reviews = group_comments_by_round(first_res_reviews, ranking=0)
reviews = webcomment_templates.tmpl_get_first_comments_with_ranking(recID, ln, grouped_reviews, nb_res_reviews, avg_score, warnings)
return (comments, reviews)
# remark
else:
return(webcomment_templates.tmpl_get_first_remarks(first_res_comments, ln, nb_res_comments), None)
def group_comments_by_round(comments, ranking=0):
"""
Group comments by the round to which they belong
"""
comment_rounds = {}
ordered_comment_round_names = []
for comment in comments:
comment_round_name = ranking and comment[11] or comment[7]
if not comment_rounds.has_key(comment_round_name):
comment_rounds[comment_round_name] = []
ordered_comment_round_names.append(comment_round_name)
comment_rounds[comment_round_name].append(comment)
return [(comment_round_name, comment_rounds[comment_round_name]) \
for comment_round_name in ordered_comment_round_names]
def calculate_avg_score(res):
"""
private function
Calculate the avg score of reviews present in res
@param res: tuple of tuple returned from query_retrieve_comments_or_remarks
@return: a float of the average score rounded to the closest 0.5
"""
c_star_score = 6
avg_score = 0.0
nb_reviews = 0
for comment in res:
if comment[c_star_score] > 0:
avg_score += comment[c_star_score]
nb_reviews += 1
if nb_reviews == 0:
return 0.0
avg_score = avg_score / nb_reviews
avg_score_unit = avg_score - math.floor(avg_score)
if avg_score_unit < 0.25:
avg_score = math.floor(avg_score)
elif avg_score_unit > 0.75:
avg_score = math.floor(avg_score) + 1
else:
avg_score = math.floor(avg_score) + 0.5
if avg_score > 5:
avg_score = 5.0
return avg_score
def perform_request_add_comment_or_remark(recID=0,
uid=-1,
action='DISPLAY',
ln=CFG_SITE_LANG,
msg=None,
score=None,
note=None,
priority=None,
reviews=0,
comID=-1,
client_ip_address=None,
editor_type='textarea',
can_attach_files=False,
subscribe=False,
req=None,
attached_files=None,
warnings=None,
errors=None):
"""
Add a comment/review or remark
@param recID: record id
@param uid: user id
@param action: 'DISPLAY' to display add form
'SUBMIT' to submit comment once form is filled
'REPLY' to reply to an existing comment
@param ln: language
@param msg: the body of the comment/review or remark
@param score: star score of the review
@param note: title of the review
@param priority: priority of remark (int)
@param reviews: boolean, if enabled will add a review, if disabled will add a comment
@param comID: if replying, this is the comment id of the commetn are replying to
@param editor_type: the kind of editor/input used for the comment: 'textarea', 'fckeditor'
@param can_attach_files: if user can attach files to comments or not
@param subscribe: if True, subscribe user to receive new comments by email
@param req: request object. Used to register callback to send email notification
@param attached_files: newly attached files to this comment, mapping filename to filepath
@type attached_files: dict
@param warning_msgs: list of standard warnings that should be considered
@param errors_msgs: list of standard errors that should be considered
@return:
- html add form if action is display or reply
- html successful added form if action is submit
"""
if warnings is None:
warnings = []
if errors is None:
errors = []
actions = ['DISPLAY', 'REPLY', 'SUBMIT']
_ = gettext_set_language(ln)
## check arguments
check_recID_is_in_range(recID, warnings, ln)
if uid <= 0:
errors.append(('ERR_WEBCOMMENT_UID_INVALID', uid))
return ('', errors, warnings)
if attached_files is None:
attached_files = {}
user_contact_info = query_get_user_contact_info(uid)
nickname = ''
if user_contact_info:
if user_contact_info[0]:
nickname = user_contact_info[0]
# show the form
if action == 'DISPLAY':
if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS:
return (webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, nickname, ln, msg, score, note, warnings, can_attach_files=can_attach_files), errors, warnings)
elif not reviews and CFG_WEBCOMMENT_ALLOW_COMMENTS:
return (webcomment_templates.tmpl_add_comment_form(recID, uid, nickname, ln, msg, warnings, can_attach_files=can_attach_files), errors, warnings)
else:
errors.append(('ERR_WEBCOMMENT_COMMENTS_NOT_ALLOWED',))
elif action == 'REPLY':
if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS:
errors.append(('ERR_WEBCOMMENT_REPLY_REVIEW',))
return (webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, nickname, ln, msg, score, note, warnings, can_attach_files=can_attach_files), errors, warnings)
elif not reviews and CFG_WEBCOMMENT_ALLOW_COMMENTS:
textual_msg = msg
if comID > 0:
comment = query_get_comment(comID)
if comment:
user_info = get_user_info(comment[2])
if user_info:
date_creation = convert_datetext_to_dategui(str(comment[4]))
# Build two msg: one mostly textual, the other one with HTML markup, for the FCKeditor.
msg = _("%(x_name)s wrote on %(x_date)s:")% {'x_name': user_info[2], 'x_date': date_creation}
textual_msg = msg
# 1 For FCKeditor input
msg += '
'
msg += comment[3]
msg = email_quote_txt(text=msg)
msg = email_quoted_txt2html(text=msg)
msg = ' ' + msg + ' '
# 2 For textarea input
textual_msg += "\n\n"
textual_msg += comment[3]
textual_msg = email_quote_txt(text=textual_msg)
return (webcomment_templates.tmpl_add_comment_form(recID, uid, nickname, ln, msg, warnings, textual_msg, can_attach_files=can_attach_files, reply_to=comID), errors, warnings)
else:
errors.append(('ERR_WEBCOMMENT_COMMENTS_NOT_ALLOWED',))
# check before submitting form
elif action == 'SUBMIT':
if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS:
if note.strip() in ["", "None"] and not CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS:
warnings.append(('WRN_WEBCOMMENT_ADD_NO_TITLE',))
if score == 0 or score > 5:
warnings.append(("WRN_WEBCOMMENT_ADD_NO_SCORE",))
if msg.strip() in ["", "None"] and not CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS:
warnings.append(('WRN_WEBCOMMENT_ADD_NO_BODY',))
# if no warnings, submit
if len(warnings) == 0:
if reviews:
if check_user_can_review(recID, client_ip_address, uid):
success = query_add_comment_or_remark(reviews, recID=recID, uid=uid, msg=msg,
note=note, score=score, priority=0,
client_ip_address=client_ip_address,
editor_type=editor_type,
req=req,
reply_to=comID)
else:
warnings.append('WRN_WEBCOMMENT_CANNOT_REVIEW_TWICE')
success = 1
else:
if check_user_can_comment(recID, client_ip_address, uid):
success = query_add_comment_or_remark(reviews, recID=recID, uid=uid, msg=msg,
note=note, score=score, priority=0,
client_ip_address=client_ip_address,
editor_type=editor_type,
req=req,
reply_to=comID, attached_files=attached_files)
if success > 0 and subscribe:
subscribe_user_to_discussion(recID, uid)
else:
warnings.append('WRN_WEBCOMMENT_TIMELIMIT')
success = 1
if success > 0:
if CFG_WEBCOMMENT_ADMIN_NOTIFICATION_LEVEL > 0:
notify_admin_of_new_comment(comID=success)
return (webcomment_templates.tmpl_add_comment_successful(recID, ln, reviews, warnings, success), errors, warnings)
else:
errors.append(('ERR_WEBCOMMENT_DB_INSERT_ERROR'))
# if are warnings or if inserting comment failed, show user where warnings are
if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS:
return (webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, nickname, ln, msg, score, note, warnings, can_attach_files=can_attach_files), errors, warnings)
else:
return (webcomment_templates.tmpl_add_comment_form(recID, uid, nickname, ln, msg, warnings, can_attach_files=can_attach_files), errors, warnings)
# unknown action send to display
else:
warnings.append(('WRN_WEBCOMMENT_ADD_UNKNOWN_ACTION',))
if reviews and CFG_WEBCOMMENT_ALLOW_REVIEWS:
return (webcomment_templates.tmpl_add_comment_form_with_ranking(recID, uid, ln, msg, score, note, warnings, can_attach_files=can_attach_files), errors, warnings)
else:
return (webcomment_templates.tmpl_add_comment_form(recID, uid, ln, msg, warnings, can_attach_files=can_attach_files), errors, warnings)
return ('', errors, warnings)
def notify_admin_of_new_comment(comID):
"""
Sends an email to the admin with details regarding comment with ID = comID
"""
comment = query_get_comment(comID)
if len(comment) > 0:
(comID2,
id_bibrec,
id_user,
body,
date_creation,
star_score, nb_votes_yes, nb_votes_total,
title,
nb_abuse_reports, round_name, restriction) = comment
else:
return
user_info = query_get_user_contact_info(id_user)
if len(user_info) > 0:
(nickname, email, last_login) = user_info
if not len(nickname) > 0:
nickname = email.split('@')[0]
else:
nickname = email = last_login = "ERROR: Could not retrieve"
review_stuff = '''
Star score = %s
Title = %s''' % (star_score, title)
washer = EmailWasher()
body = washer.wash(body)
record_info = webcomment_templates.tmpl_email_new_comment_admin(id_bibrec)
out = '''
The following %(comment_or_review)s has just been posted (%(date)s).
AUTHOR:
Nickname = %(nickname)s
Email = %(email)s
User ID = %(uid)s
RECORD CONCERNED:
Record ID = %(recID)s
URL = <%(siteurl)s/record/%(recID)s/%(comments_or_reviews)s/>
%(record_details)s
%(comment_or_review_caps)s:
%(comment_or_review)s ID = %(comID)s %(review_stuff)s
Body =
<--------------->
%(body)s
<--------------->
ADMIN OPTIONS:
To moderate the %(comment_or_review)s go to %(siteurl)s/record/%(recID)s/%(comments_or_reviews)s/display?%(arguments)s
''' % \
{ 'comment_or_review' : star_score > 0 and 'review' or 'comment',
'comment_or_review_caps': star_score > 0 and 'REVIEW' or 'COMMENT',
'comments_or_reviews' : star_score > 0 and 'reviews' or 'comments',
'date' : date_creation,
'nickname' : nickname,
'email' : email,
'uid' : id_user,
'recID' : id_bibrec,
'record_details' : record_info,
'comID' : comID2,
'review_stuff' : star_score > 0 and review_stuff or "",
'body' : body.replace(' ','\n'),
'siteurl' : CFG_SITE_URL,
'arguments' : 'ln=en&do=od#%s' % comID
}
from_addr = '%s WebComment <%s>' % (CFG_SITE_NAME, CFG_WEBALERT_ALERT_ENGINE_EMAIL)
comment_collection = get_comment_collection(comID)
to_addrs = get_collection_moderators(comment_collection)
rec_collection = guess_primary_collection_of_a_record(id_bibrec)
report_nums = get_fieldvalues(id_bibrec, "037__a")
report_nums += get_fieldvalues(id_bibrec, "088__a")
report_nums = ', '.join(report_nums)
subject = "A new comment/review has just been posted [%s|%s]" % (rec_collection, report_nums)
send_email(from_addr, to_addrs, subject, out)
def check_recID_is_in_range(recID, warnings=[], ln=CFG_SITE_LANG):
"""
Check that recID is >= 0
Append error messages to errors listi
@param recID: record id
@param warnings: the warnings list of the calling function
@return: tuple (boolean, html) where boolean (1=true, 0=false)
and html is the body of the page to display if there was a problem
"""
# Make errors into a list if needed
if type(warnings) is not list:
errors = [warnings]
try:
recID = int(recID)
except:
pass
if type(recID) is int:
if recID > 0:
from invenio.search_engine import record_exists
success = record_exists(recID)
if success == 1:
return (1,"")
else:
warnings.append(('ERR_WEBCOMMENT_RECID_INEXISTANT', recID))
return (0, webcomment_templates.tmpl_record_not_found(status='inexistant', recID=recID, ln=ln))
elif recID == 0:
warnings.append(('ERR_WEBCOMMENT_RECID_MISSING',))
return (0, webcomment_templates.tmpl_record_not_found(status='missing', recID=recID, ln=ln))
else:
warnings.append(('ERR_WEBCOMMENT_RECID_INVALID', recID))
return (0, webcomment_templates.tmpl_record_not_found(status='invalid', recID=recID, ln=ln))
else:
warnings.append(('ERR_WEBCOMMENT_RECID_NAN', recID))
return (0, webcomment_templates.tmpl_record_not_found(status='nan', recID=recID, ln=ln))
def check_int_arg_is_in_range(value, name, errors, gte_value, lte_value=None):
"""
Check that variable with name 'name' >= gte_value and optionally <= lte_value
Append error messages to errors list
@param value: variable value
@param name: variable name
@param errors: list of error tuples (error_id, value)
@param gte_value: greater than or equal to value
@param lte_value: less than or equal to value
@return: boolean (1=true, 0=false)
"""
# Make errors into a list if needed
if type(errors) is not list:
errors = [errors]
if type(value) is not int or type(gte_value) is not int:
errors.append(('ERR_WEBCOMMENT_PROGRAMNING_ERROR',))
return 0
if type(value) is not int:
errors.append(('ERR_WEBCOMMENT_ARGUMENT_NAN', value))
return 0
if value < gte_value:
errors.append(('ERR_WEBCOMMENT_ARGUMENT_INVALID', value))
return 0
if lte_value:
if type(lte_value) is not int:
errors.append(('ERR_WEBCOMMENT_PROGRAMNING_ERROR',))
return 0
if value > lte_value:
errors.append(('ERR_WEBCOMMENT_ARGUMENT_INVALID', value))
return 0
return 1
def get_mini_reviews(recid, ln=CFG_SITE_LANG):
"""
Returns the web controls to add reviews to a record from the
detailed record pages mini-panel.
@param recid: the id of the displayed record
@param ln: the user's language
"""
if CFG_WEBCOMMENT_ALLOW_SHORT_REVIEWS:
action = 'SUBMIT'
else:
action = 'DISPLAY'
reviews = query_retrieve_comments_or_remarks(recid, ranking=1)
return webcomment_templates.tmpl_mini_review(recid, ln, action=action,
avg_score=calculate_avg_score(reviews),
nb_comments_total=len(reviews))
def check_user_can_view_comments(user_info, recid):
"""Check if the user is authorized to view comments for given
recid.
Returns the same type as acc_authorize_action
"""
# Check user can view the record itself first
(auth_code, auth_msg) = check_user_can_view_record(user_info, recid)
if auth_code:
return (auth_code, auth_msg)
# Check if user can view the comments
## But first can we find an authorization for this case action,
## for this collection?
record_primary_collection = guess_primary_collection_of_a_record(recid)
return acc_authorize_action(user_info, 'viewcomment', authorized_if_no_roles=True, collection=record_primary_collection)
def check_user_can_view_comment(user_info, comid, restriction=None):
"""Check if the user is authorized to view a particular comment,
given the comment restriction. Note that this function does not
check if the record itself is restricted to the user, which would
mean that the user should not see the comment.
You can omit 'comid' if you already know the 'restriction'
@param user_info: the user info object
@param comid: the comment id of that we want to check
@param restriction: the restriction applied to given comment (if known. Otherwise retrieved automatically)
@return: the same type as acc_authorize_action
"""
if restriction is None:
comment = query_get_comment(comid)
if comment:
restriction = comment[11]
else:
return (1, 'Comment %i does not exist' % comid)
if restriction == "":
return (0, '')
return acc_authorize_action(user_info, 'viewrestrcomment', status=restriction)
def check_user_can_send_comments(user_info, recid):
"""Check if the user is authorized to comment the given
recid. This function does not check that user can view the record
or view the comments
Returns the same type as acc_authorize_action
"""
## First can we find an authorization for this case, action + collection
record_primary_collection = guess_primary_collection_of_a_record(recid)
return acc_authorize_action(user_info, 'sendcomment', authorized_if_no_roles=True, collection=record_primary_collection)
def check_user_can_attach_file_to_comments(user_info, recid):
"""Check if the user is authorized to attach a file to comments
for given recid. This function does not check that user can view
the comments or send comments.
Returns the same type as acc_authorize_action
"""
## First can we find an authorization for this case action, for
## this collection?
record_primary_collection = guess_primary_collection_of_a_record(recid)
return acc_authorize_action(user_info, 'attachcommentfile', authorized_if_no_roles=False, collection=record_primary_collection)
diff --git a/modules/websearch/lib/search_engine_query_parser.py b/modules/websearch/lib/search_engine_query_parser.py
index a6f289fdd..d3c6ed4d6 100644
--- a/modules/websearch/lib/search_engine_query_parser.py
+++ b/modules/websearch/lib/search_engine_query_parser.py
@@ -1,993 +1,993 @@
# -*- coding: utf-8 -*-
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
# pylint: disable-msg=C0301
"""CDS Invenio Search Engine query parsers."""
import re
import string
from invenio.bibindex_engine_tokenizer import BibIndexFuzzyNameTokenizer as FNT
NameScanner = FNT()
class SearchQueryParenthesisedParser:
"""Parse search queries containing parenthesis.
Current implementation is a simple linear parsing that does not support
nested parenthesis and priority of operators.
In case there is a need for nested parenthesis and priority of operators,
the current implementation can be replaced by one that uses expression
trees as they are more or less a standard way for parsing expressions.
The method doing the main work is parse_query()
Input: parse_query("ellis AND (muon OR kaon)")
Output: list of [operator1, expression1, operator2, expression2, operator3...,expressionN].
In case of error: Exception is raised
"""
# all operator symbols recognized in expression
_operators = ['+', '|', '-']
# error messages
_error_message_mismatched_parentheses = "Mismatched parenthesis."
_error_message_nested_parentheses_not_supported = "Nested parenthesis are currently not supported."
def __init__(self, default_operator='+'):
"""Initialize the state of the parser"""
# default operator to be used if operator is missing between patterns
self._DEFAULT_OPERATOR = default_operator
self._re_quotes_match = re.compile('[^\\\\](".*?[^\\\\]")|[^\\\\](\'.*?[^\\\\]\')')
self._query = ''
# list of parsed patterns and operators
self._patterns = []
# indexes in the parsed query of beginning and end of currently parsed pattern
self._pattern_beginning = 0
self._pattern_end = 0
# operators before and after the current pattern matched during parsing
_preceding_operator = ""
_preceding_operator_position = -1
_following_operator = ""
_following_operator_position = -1
# flag indicating if processed symbols are inside parenthesis
self._inside_parentheses = False
# all operator symbols recognized in expression
_operators = ['+', '|', '-']
#print "\n__init__ called!" # FIXME: why can't operator be defined local?
def _reset_parse_state(self):
"""Clean up state from any previous parse operations."""
# FIXME: Can this be done away with? Is parse_query overly stateful?
self._patterns = []
self._pattern_beginning = 0
self._pattern_end = 0
self._clear_preceding_operator()
self._clear_following_operator()
self._inside_parentheses = False
def _clean_query(self, query):
"""Clean the query performing replacement of AND, OR, NOT operators with their
equivalents +, |, - """
# result of the replacement
result = ""
current_position = 0
for match in self._re_quotes_match.finditer(query):
# clean the content after the previous quotes and before current quotes
cleanable_content = query[current_position : match.start()]
cleanable_content = self._clean_operators(cleanable_content)
# get the content in the quotas
quoted_content = match.group(0)
# append the processed content to the result
result = result + cleanable_content + quoted_content
# move current position at the end of the processed content
current_position = match.end()
# clean the content from the last appearance of quotes till the end of the query
cleanable_content = query[current_position : len(query)]
cleanable_content = self._clean_operators(cleanable_content)
result = result + cleanable_content
return result
def _clean_operators(self, query = ""):
"""Replaces some of the content of the query with equivalent content
(e.g. replace 'AND' operator with '+' operator) for easier processing after that."""
for word, symbol in (('not', '-'), ('and', '+'), ('or', '|')):
query = re.sub('(?i)\\b'+word+'\\b', symbol, query)
return query
def parse_query(self, query=""):
"""Parses the query and generates as an output a list of values
containing a sequence of patterns and operators
[operator1, pattern1, operator2, pattern2, operator3, ..., operatorN, patternN]
Every pattern is either sequence of search terms and operators
inside parenthesis or sequence of such terms and operators that are
outside parenthesis. Operators in the list are these operators that
are between pattern in parenthesis and pattern that is not in parenthesis"""
# if the query does not contain parentheses we just return it
if not self._hasQueryParentheses(query):
# we add the default operator in front of the query
return [self._DEFAULT_OPERATOR, query]
# clean the query replacing some of the content e.g. replace 'AND' with '+'
query = self._clean_query(query)
self._query = query
self._reset_parse_state()
# flag indicating if we are inside quotes
inside_quotes = False
# used for detecting escape sequences. Contains previously processed character.
previous_character = ""
# quotes that are recognized
quotes_symbols = ['"', "'"]
# contains the quotes symbol when we are between quotes
current_quotes_symbol = ""
# iterate through every character in query and perform appropriate action
for current_index in range(0, len(self._query)):
character = self._query[current_index]
# end of the pattern is the current character, which is not included
self._pattern_end = current_index
# include all the characters between quotes in the pattern without special processing
if inside_quotes and character != current_quotes_symbol:
continue
# process the quotes if they are not escaped
if character in quotes_symbols and previous_character != '\\':
# if we are not inside this should be a beginning of the quotes
if not inside_quotes:
inside_quotes = True
current_quotes_symbol = character
# in case we are inside quotes this is the closing quote
elif inside_quotes and character == current_quotes_symbol:
inside_quotes = False
current_quotes_symbol = ""
#asssign values to operators if necessary
self._assign_default_values_for_operators_if_necessary()
elif '(' == character and previous_character != '\\':
self._handle_open_parenthesis()
elif ')' == character and previous_character != '\\':
self._handle_close_parenthesis()
elif character in self._operators:
self._handle_operator(current_index)
else:
self._handle_non_white_space_characters(current_index)
# hold the previous character to use it when checking for escape sequences
previous_character = character
# as far as patterns are appended when reaching parenthesis we should ensure that we append the last pattern
self._append_last_pattern()
# check for mismatched parentheses
if self._inside_parentheses:
raise InvenioWebSearchQueryParserException("Mismatched parenthesis.")
return self._patterns
def _hasQueryParentheses(self, query=""):
"""Check if the query contain parentheses inside."""
if -1 != query.find("("):
return True
if -1 != query.find(")"):
return True
return False
def _handle_open_parenthesis(self):
"""Process opening parenthesis in the query."""
# check if we are already inside parentheses
if self._inside_parentheses:
raise InvenioWebSearchQueryParserException("Nested parentheses currently unsupported.")
# both operators preceding and following the pattern before parenthesis
# are known and also the pattern itself so append them to the result list.
self._append_preceding_operator()
self._append_pattern()
self._append_following_operator()
# mark that we are inside parenthesis
self._inside_parentheses = True
# clear operators because they are already in the result.
self._clear_preceding_operator()
self._clear_following_operator()
def _handle_close_parenthesis(self):
"""Process closing parenthesis in the query."""
# check if we are inside parentheses
if not self._inside_parentheses:
raise InvenioWebSearchQueryParserException("Mismatched parenthesis.")
# append the pattern between the parentheses
self._append_pattern()
# mark that we are not inside parenthesis any more
self._inside_parentheses = False
def _handle_operator(self, operator_position):
"""Process operator symbols in the query."""
if self._inside_parentheses:
return
operator = self._query[operator_position]
# if there is no preceding operator that means that this is the first
# appearance of an operator after closing parenthesis so we assign
# the value to the preceding operator
if self._preceding_operator == "":
self._preceding_operator = operator
self._preceding_operator_position = operator_position
# move the beginning of the patter after the operator
self._pattern_beginning = operator_position + 1
# if this is the operator preceding the query, we are not supposed
# to know the following operator because we are parsing beginning
self._clear_following_operator()
# if the preceding operator is assigned then this operator is currently
# the following operator of the pattern. If there operator after it will replace it
else:
self._following_operator = operator
self._following_operator_position = operator_position
def _handle_non_white_space_characters(self, character_postition):
"""Process all non white characters that are not operators, quotes
or parenthesis and are not inside parenthesis or quotes"""
character = self._query[character_postition]
# if the character is white space or we are in parentheses we skip processing
if character.isspace() or self._inside_parentheses:
return
self._assign_default_values_for_operators_if_necessary()
def _assign_default_values_for_operators_if_necessary(self):
"""Assign default values for preceding or following operators if this is necessary."""
# if the preceding operator is empty that means we are starting to parse a new
# pattern but there is no operator in front of it. In this case assign default
# value to preceding operator
if self._preceding_operator == "":
self._preceding_operator = self._DEFAULT_OPERATOR
self._preceding_operator_position = -1
# otherwise we are now parsing a pattern and can assign current value for following operator
else:
# default operator is assigned as a value and will be changed next
# time operator character is reached
self._following_operator = self._DEFAULT_OPERATOR
self._following_operator_position = -1
def _append_pattern(self):
"""Appends the currently parsed pattern to the list with results"""
begin = self._calculate_pattern_beginning()
end = self._calculate_pattern_end()
current_pattern = self._query[begin : end]
current_pattern = current_pattern.strip()
#don't append empty patterns
if current_pattern != "":
self._patterns.append(current_pattern)
# move the beginning of next pattern at the end of current pattern
self._pattern_beginning = self._pattern_end+1
def _append_preceding_operator(self):
"""Appends the operator preceding current pattern to the list with results."""
if self._preceding_operator != "":
self._patterns.append(self._preceding_operator)
else:
self._patterns.append(self._DEFAULT_OPERATOR)
def _append_following_operator(self):
"""Appends the operator following current pattern to the list with results."""
if self._following_operator != "":
self._patterns.append(self._following_operator)
def _append_last_pattern(self):
"""Appends the last pattern from the query to the list with results.
Operator preceding this pattern is also appended."""
self._pattern_end = self._pattern_end+1
self._append_preceding_operator()
self._append_pattern()
# if the last pattern was empty but default preceding operator
# is appended, then clean it
if self._patterns[-1] == self._DEFAULT_OPERATOR:
self._patterns = self._patterns[0 : -1] # remove last element
def _calculate_pattern_beginning(self):
"""Calculates exact beginning of a pattern taking in mind positions of
operator proceeding the pattern."""
# if there is an operator character before the pattern it should not be
# included in the pattern
if self._pattern_beginning < self._preceding_operator_position:
return self._preceding_operator_position + 1
return self._pattern_beginning
def _calculate_pattern_end(self):
"""Calculates exact end of a pattern taking in mind positions of
operator following the pattern."""
# if there is an operator character after the pattern it should not be
# included in the pattern
if self._pattern_end > self._following_operator_position and self._following_operator_position != -1:
return self._following_operator_position
return self._pattern_end
def _clear_preceding_operator(self):
"""Cleans the value of the preceding operator"""
self._preceding_operator = ""
# after the value is cleaned the position is also cleaned. We accept -1 for cleaned value.
self._preceding_operator_position = -1
def _clear_following_operator(self):
"""Cleans the value of the following operator"""
self._following_operator = ""
# after the value is cleaned the position is also cleaned. We accept -1 for cleaned value.
self._following_operator_position = -1
class InvenioWebSearchQueryParserException(Exception):
"""Exception for parsing errors."""
def __init__(self, message):
"""Initialization."""
self.message = message
class SpiresToInvenioSyntaxConverter:
"""Converts queries defined with SPIRES search syntax into queries
that use Invenio search syntax.
"""
# Constants defining fields
_DATE_ADDED_FIELD = '961__x:'
_DATE_UPDATED_FIELD = '961__c:' # FIXME: define and use dateupdate:
_DATE_FIELD = '269__c:'
_A_TAG = 'author:'
_EA_TAG = 'exactauthor:'
# Dictionary containing the matches between SPIRES keywords
# and their corresponding Invenio keywords or fields
# SPIRES keyword : Invenio keyword or field
_SPIRES_TO_INVENIO_KEYWORDS_MATCHINGS = {
# affiliation
'affiliation' : 'affiliation:',
'affil' : 'affiliation:',
'aff' : 'affiliation:',
'af' : 'affiliation:',
'institution' : 'affiliation:',
'inst' : 'affiliation:',
# any field
'any' : 'anyfield:',
# bulletin
'bb' : 'reportnumber:',
'bbn' : 'reportnumber:',
'bull' : 'reportnumber:',
'bulletin-bd' : 'reportnumber:',
'bulletin-bd-no' : 'reportnumber:',
'eprint' : 'reportnumber:',
# citation / reference
'c' : 'reference:',
'citation' : 'reference:',
'cited' : 'reference:',
'jour-vol-page' : 'reference:',
'jvp' : 'reference:',
# collaboration
'collaboration' : 'collaboration:',
'collab-name' : 'collaboration:',
'cn' : 'collaboration:',
# conference number
'conf-number' : '111__g:',
'cnum' : '111__g:',
# country
'cc' : '044__a:',
'country' : '044__a:',
# date
'date': _DATE_FIELD,
'd': _DATE_FIELD,
# date added
'date-added': _DATE_ADDED_FIELD,
'dadd': _DATE_ADDED_FIELD,
'da': _DATE_ADDED_FIELD,
# date updated
'date-updated': _DATE_UPDATED_FIELD,
'dupd': _DATE_UPDATED_FIELD,
'du': _DATE_UPDATED_FIELD,
# first author
'fa' : '100__a:',
'first-author' : '100__a:',
# author
'a' : 'author:',
'au' : 'author:',
'author' : 'author:',
'name' : 'author:',
# exact author
# this is not a real keyword match. It is pseudo keyword that
# will be replaced later with author search
'ea' : 'exactauthor:',
'exact-author' : 'exactauthor:',
# experiment
'exp' : 'experiment:',
'experiment' : 'experiment:',
'expno' : 'experiment:',
'sd' : 'experiment:',
'se' : 'experiment:',
# journal
'journal' : 'journal:',
'j' : 'journal:',
'published_in' : 'journal:',
'spicite' : 'journal:',
'vol' : 'journal:',
# journal page
'journal-page' : '773__c:',
'jp' : '773__c:',
# journal year
'journal-year' : '773__y:',
'jy' : '773__y:',
# key
'key' : '970__a:',
'irn' : '970__a:',
'record' : '970__a:',
'document' : '970__a:',
'documents' : '970__a:',
# keywords
'k' : 'keyword:',
'keywords' : 'keyword:',
# note
'note' : '500__a:',
'n' : '500__a:',
# old title
'old-title' : '246__a:',
'old-t' : '246__a:',
'ex-ti' : '246__a:',
'et' : '246__a:',
# ppf subject
'ppf-subject' : '650__a:',
'ps' : '650__a:',
'scl' : '650__a:',
'status' : '650__a:',
# report number
'r' : 'reportnumber:',
'rn' : 'reportnumber:',
'rept' : 'reportnumber:',
'report' : 'reportnumber:',
'report-num' : 'reportnumber:',
# title
't' : 'title:',
'ti' : 'title:',
'title' : 'title:',
'with-language' : 'title:',
# topic
'topic' : '653__a:',
'tp' : '653__a:',
'hep-topic' : '653__a:',
'desy-keyword' : '653__a:',
'dk' : '653__a:',
# replace all the keywords without match with empty string
# this will remove the noise from the unknown keywrds in the search
# and will in all fields for the words following the keywords
# category
'arx' : '037__c:',
'category' : '037__c:',
# primarch
'parx' : '037__c:',
'primarch' : '037__c:',
# texkey
'texkey' : '035__z:',
# type code
'tc' : '690C_a:',
'ty' : '690C_a:',
'type' : '690C_a:',
'type-code' : '690C_a',
# field code
'f' : '65017a:',
'fc' : '65017a:',
'field' : '65017a:',
'field-code' : '65017a:',
# coden
'bc' : '',
'browse-only-indx' : '',
'coden' : '',
'journal-coden' : '',
# energy
'e' : '',
'energy' : '',
'energyrange-code' : '',
# exact experiment number
'ee' : '',
'exact-exp' : '',
'exact-expno' : '',
# hidden note
'hidden-note' : '',
'hn' : '',
# ppf
'ppf' : '',
'ppflist' : '',
# slac topics
'ppfa' : '',
'slac-topics' : '',
'special-topics' : '',
'stp' : '',
# test index
'test' : '',
'testindex' : '',
}
def __init__(self):
"""Initialize the state of the converter"""
- self._init_monthes()
+ self._init_months()
self._compile_regular_expressions()
def _compile_regular_expressions(self):
"""Compiles some of the regular expressions that are used in the class
for higher performance."""
# regular expression that matches the contents in single and double quotes
# taking in mind if they are escaped.
self._re_quotes_match = re.compile('[^\\\\](".*?[^\\\\]")|[^\\\\](\'.*?[^\\\\]\')')
# for matching cases where kw needs distributing
self._re_distribute_keywords = re.compile(r'\b(?P\S*:)(?P.+?)\s*(?Pand not | and | or | not )\s*(?P[^:]*?)(?= and not | and | or | not |$)', re.IGNORECASE)
# regular expression that matches author patterns
self._re_author_match = re.compile(r'\bauthor:\s*(?P.+?)\s*(?= and not | and | or | not |$)', re.IGNORECASE)
# regular expression that matches exact author patterns
# the group defined in this regular expression is used in method
# _convert_spires_exact_author_search_to_invenio_author_search(...)
# in case of changes correct also the code in this method
self._re_exact_author_match = re.compile(r'\bexactauthor:(?P[^\'\"].*?[^\'\"]\b)(?= and not | and | or | not |$)', re.IGNORECASE)
# regular expression that matches search term, its conent (words that
# are searched)and the operator preceding the term. In case that the
# names of the groups defined in the expression are changed, the
# chagned should be reflected in the code that use it.
self._re_search_term_pattern_match = re.compile(r'\b(?Pfind|and|or|not)\s+(?Ptitle:|keyword:)(?P.*?(\b|\'|\"|\/))(?= and | or | not |$)', re.IGNORECASE)
# regular expression used to split string by white space as separator
self._re_split_pattern = re.compile(r'\s*')
# regular expression matching date after pattern
self._re_date_after_match = re.compile(r'\b(d|date)\b\s*(after|>)\s*(?P\d{4})\b', re.IGNORECASE)
# regular expression matching date after pattern
self._re_date_before_match = re.compile(r'\b(d|date)\b\s*(before|<)\s*(?P\d{4})\b', re.IGNORECASE)
# regular expression matching dates in general
self._re_dates_match = self._compile_dates_regular_expression()
# for finding (and changing) a variety of different SPIRES search keywords
self._re_find_or_fin_at_start = re.compile('^find? .*$')
# patterns for subbing out spaces within quotes temporarily
self._re_pattern_single_quotes = re.compile("'(.*?)'")
self._re_pattern_double_quotes = re.compile("\"(.*?)\"")
self._re_pattern_regexp_quotes = re.compile("\/(.*?)\/")
self._re_pattern_space = re.compile("__SPACE__")
def convert_query(self, query):
"""Converts the query from SPIRES syntax to Invenio syntax
Queries are assumed SPIRES queries only if they start with FIND or FIN"""
# allow users to use "f" only...
query = re.sub('^[fF] ', 'find ', query)
# SPIRES syntax allows searches with 'find' or 'fin'.
if self._re_find_or_fin_at_start.match(query.lower()):
# Everywhere else make the assumption that all and only queries
# starting with 'find' are SPIRES queries. Turn fin into find.
query = re.sub('^[fF][iI][nN] ', 'find ', query)
# these calls are before keywords replacement becuase when keywords
# are replaced, date keyword is replaced by specific field search
# and the DATE keyword is not match in DATE BEFORE or DATE AFTER
query = self._convert_spires_date_before_to_invenio_span_query(query)
query = self._convert_spires_date_after_to_invenio_span_query(query)
# call to _replace_spires_keywords_with_invenio_keywords should be at the
# beginning because the next methods use the result of the replacement
query = self._replace_spires_keywords_with_invenio_keywords(query)
query = self._distribute_keywords_across_combinations(query)
query = self._convert_dates(query)
query = self._convert_spires_author_search_to_invenio_author_search(query)
query = self._convert_spires_exact_author_search_to_invenio_author_search(query)
query = self._convert_spires_truncation_to_invenio_truncation(query)
query = self._expand_search_patterns(query)
# remove FIND in the beginning of the query as it is not necessary in Invenio
query = query[4:]
query = query.strip()
return query
- def _init_monthes(self):
+ def _init_months(self):
"""Defines a dictionary matching the name
of the month with its corresponding number"""
- # this dictionary is used when generating match patterns for monthes
- self._monthes = {'jan':'01', 'january':'01',
+ # this dictionary is used when generating match patterns for months
+ self._months = {'jan':'01', 'january':'01',
'feb':'02', 'february':'02',
'mar':'03', 'march':'03',
'apr':'04', 'april':'04',
'may':'05', 'may':'05',
'jun':'06', 'june':'06',
'jul':'07', 'july':'07',
'aug':'08', 'august':'08',
'sep':'09', 'september':'09',
'oct':'10', 'october':'10',
'nov':'11', 'november':'11',
'dec':'12', 'december':'12'}
# this dicrionary is used to transform name of the month
# to a number used in the date format. By this reason it
# contains also the numbers itself to simplify the conversion
self._month_name_to_month_number = {'1':'01', '01':'01',
'2':'02', '02':'02',
'3':'03', '03':'03',
'4':'04', '04':'04',
'5':'05', '05':'05',
'6':'06', '06':'06',
'7':'07', '07':'07',
'8':'08', '08':'08',
'9':'09', '09':'09',
'10':'10',
'11':'11',
'12':'12',}
- # combine it with monthes in order to cover all the cases
- self._month_name_to_month_number.update(self._monthes)
+ # combine it with months in order to cover all the cases
+ self._month_name_to_month_number.update(self._months)
def _get_month_names_match(self):
"""Retruns part of a patter that matches month in a date"""
- monthes_match = ''
- for month_name in self._monthes.keys():
- monthes_match = monthes_match + month_name + '|'
+ months_match = ''
+ for month_name in self._months.keys():
+ months_match = months_match + month_name + '|'
- monthes_match = r'\b(' + monthes_match[0:-1] + r')\b'
+ months_match = r'\b(' + months_match[0:-1] + r')\b'
- return monthes_match
+ return months_match
def _get_month_number(self, month_name):
"""Returns the corresponding number for a given month
e.g. for February it returns 02"""
return self._month_name_to_month_number[month_name.lower()]
def _compile_dates_regular_expression(self):
""" Returns compiled regular expression matching dates in general that follow particular keywords"""
date_preceding_terms_match = r'\b((?<=' + \
self._DATE_ADDED_FIELD +')|(?<=' + \
self._DATE_UPDATED_FIELD + ')|(?<=' + \
self._DATE_FIELD +'))'
day_match = r'\b(0?[1-9]|[12][0-9]|3[01])\b'
month_match = r'\b(0?[1-9]|1[012])\b'
month_names_match = self._get_month_names_match()
year_match = r'\b([1-9][0-9]?)?[0-9]{2}\b'
date_separator_match = r'(\s+|\s*[,/\-\.]\s*)'
dates_re = date_preceding_terms_match + r'\s*' + \
r'(((((?P' + day_match + r')' + date_separator_match + ')?' + \
r'(?P' + month_match + r'|' + month_names_match + r'))' + \
r'|' + \
r'((?P' + month_names_match + r')' + date_separator_match + \
r'(?P' + day_match + r')))' + \
date_separator_match + r')?' + \
r'(?P' + year_match + r')'
return re.compile(dates_re, re.IGNORECASE)
def _convert_dates(self, query):
"""Converts dates in the query in format expected from invenio"""
def create_replacement_pattern(match):
"""method used for replacement with regular expression"""
QUOTES = '"'
# retrieve the year
year = match.group('year')
# in case only last two digits are provided, consider it is 19xx
if len(year) == 2:
year = '19' + year
# retrieve the month
month_name = match.group('month')
if None == month_name:
month_name = match.group('month2')
# if there is no month, look for everything in given year
if None == month_name:
return QUOTES + year + '*' + QUOTES
month = self._get_month_number(month_name)
# retrieve the day
day = match.group('day')
if None == day:
day = match.group('day2')
# if day is missing, look for everything in geven year and month
if None == day:
return QUOTES + year + '-' + month + '-*' + QUOTES
if len(day) == 1:
day = '0'+day
return QUOTES + year + '-' + month + '-' + day + QUOTES
query = self._re_dates_match.sub(create_replacement_pattern, query)
return query
def _convert_spires_date_after_to_invenio_span_query(self, query):
"""Converts date after SPIRES search term into invenio span query"""
def create_replacement_pattern(match):
"""method used for replacement with regular expression"""
return 'year:' + match.group('year') + '->9999'
query = self._re_date_after_match.sub(create_replacement_pattern, query)
return query
def _convert_spires_date_before_to_invenio_span_query(self, query):
"""Converts date before SPIRES search term into invenio span query"""
# method used for replacement with regular expression
def create_replacement_pattern(match):
return 'year:' + '0->' + match.group('year')
query = self._re_date_before_match.sub(create_replacement_pattern, query)
return query
def _expand_search_patterns(self, query):
"""Expands search queries.
If a search term is followed by several words e.g.
author: ellis or title:THESE THREE WORDS it is exoanded to
author:ellis or (title: THESE and title:THREE...)
For a combining operator "and" is used though FIXME this is not
correct, it should really be calculated by boolean expansion of parens.
Not all the search terms are expanded this way, but only a short
list of them"""
def create_replacement_pattern(match):
result = ''
search_term = match.group('search_term')
combine_operator = match.group('combine_operator')
search_content = match.group('search_content').strip()
# replace spaces within quotes by __SPACE__ temporarily:
search_content = self._re_pattern_single_quotes.sub(lambda x: "'"+string.replace(x.group(1), ' ', '__SPACE__')+"'", search_content)
search_content = self._re_pattern_double_quotes.sub(lambda x: "\""+string.replace(x.group(1), ' ', '__SPACE__')+"\"", search_content)
search_content = self._re_pattern_regexp_quotes.sub(lambda x: "/"+string.replace(x.group(1), ' ', '__SPACE__')+"/", search_content)
words = self._re_split_pattern.split(search_content)
if len(words) > 1:
#FIXME this will break if it happens to be nested.
result = combine_operator + ' (' + search_term + words[0]
for word in words[1:]:
result = result + ' and ' + search_term + word
result = result + ') '
else:
result = combine_operator + ' ' + search_term + words[0]
# replace back __SPACE__ by spaces:
result = self._re_pattern_space.sub(" ", result)
return result.strip()
query = self._re_search_term_pattern_match.sub(create_replacement_pattern, query)
return query
def _convert_spires_truncation_to_invenio_truncation(self, query):
"""Replace SPIRES truncation symbol # with invenio trancation symbol *"""
return query.replace('#', '*')
def _convert_spires_exact_author_search_to_invenio_author_search(self, query):
"""Converts SPIRES search patterns for exact author into search pattern
for invenio"""
# method used for replacement with regular expression
def create_replacement_pattern(match):
# the regular expression where this group name is defined is in
# the method _compile_regular_expressions()
return self._EA_TAG + '"' + match.group('author_name') + '"'
query = self._re_exact_author_match.sub(create_replacement_pattern, query)
return query
def _convert_spires_author_search_to_invenio_author_search(self, query):
"""Converts SPIRES search patterns for authors to search patterns in invenio
that give similar results to the spires search.
"""
# result of the replacement
result = ''
current_position = 0
for match in self._re_author_match.finditer(query):
result += query[current_position : match.start() ]
scanned_name = NameScanner.scan(match.group('name'))
author_atoms = self._create_author_search_pattern_from_fuzzy_name_dict(scanned_name)
if author_atoms.find(' ') == -1:
result += author_atoms + ' '
else:
result += '(' + author_atoms + ') '
current_position = match.end()
result += query[current_position : len(query)]
return result
def _create_author_search_pattern_from_fuzzy_name_dict(self, fuzzy_name):
"""Creates an invenio search pattern for an author from a fuzzy name dict"""
author_name = ''
author_middle_name = ''
author_surname = ''
if len(fuzzy_name['nonlastnames']) > 0:
author_name = fuzzy_name['nonlastnames'][0]
if len(fuzzy_name['nonlastnames']) == 2:
author_middle_name = fuzzy_name['nonlastnames'][1]
if len(fuzzy_name['nonlastnames']) > 2:
author_middle_name = ' '.join(fuzzy_name['nonlastnames'][1:])
author_surname = ' '.join(fuzzy_name['lastnames'])
NAME_IS_INITIAL = (len(author_name) == 1)
NAME_IS_NOT_INITIAL = not NAME_IS_INITIAL
# we expect to have at least surname
if author_surname == '' or author_surname == None:
return ''
# ellis ---> "author:ellis"
#if author_name == '' or author_name == None:
if not author_name:
return self._A_TAG + author_surname
# ellis, j ---> "ellis, j*"
if NAME_IS_INITIAL and not author_middle_name:
return self._A_TAG + '"' + author_surname + ', ' + author_name + '*"'
# if there is middle name we expect to have also name and surname
# ellis, j. r. ---> ellis, j* r*
# j r ellis ---> ellis, j* r*
# ellis, john r. ---> ellis, j* r* or ellis, j. r. or ellis, jo. r.
# ellis, john r. ---> author:ellis, j* r* or exactauthor:ellis, j r or exactauthor:ellis jo r
if author_middle_name:
search_pattern = self._A_TAG + '"' + author_surname + ', ' + author_name + '*' + ' ' + author_middle_name.replace(" ","* ") + '*"'
if NAME_IS_NOT_INITIAL:
for i in range(1,len(author_name)):
search_pattern += ' or ' + self._EA_TAG + "\"%s, %s %s\"" % (author_surname, author_name[0:i], author_middle_name)
return search_pattern
# ellis, jacqueline ---> "ellis, jacqueline" or "ellis, j.*" or "ellis, j" or "ellis, ja.*" or "ellis, ja" or "ellis, jacqueline *"
# in case we don't use SPIRES data, the ending dot is ommited.
search_pattern = self._A_TAG + '"' + author_surname + ', ' + author_name + '*"'
if NAME_IS_NOT_INITIAL:
for i in range(1,len(author_name)):
search_pattern += ' or ' + self._EA_TAG + "\"%s, %s\"" % (author_surname, author_name[0:i])
return search_pattern
def _replace_spires_keywords_with_invenio_keywords(self, query):
"""Replaces SPIRES keywords that have directly
corresponding Invenio keywords
Replacements are done only in content that is not in quotes."""
# result of the replacement
result = ""
current_position = 0
for match in self._re_quotes_match.finditer(query):
# clean the content after the previous quotes and before current quotes
cleanable_content = query[current_position : match.start()]
cleanable_content = self._replace_all_spires_keywords_in_string(cleanable_content)
# get the content in the quotes (group one matches double
# quotes, group 2 singles)
if match.group(1):
quoted_content = match.group(1)
elif match.group(2):
quoted_content = match.group(2)
# append the processed content to the result
result = result + cleanable_content + quoted_content
# move current position at the end of the processed content
current_position = match.end()
# clean the content from the last appearance of quotes till the end of the query
cleanable_content = query[current_position : len(query)]
cleanable_content = self._replace_all_spires_keywords_in_string(cleanable_content)
result = result + cleanable_content
return result
def _replace_all_spires_keywords_in_string(self, query):
"""Replaces all SPIRES keywords in the string with their
corresponding Invenio keywords"""
for spires_keyword, invenio_keyword in self._SPIRES_TO_INVENIO_KEYWORDS_MATCHINGS.iteritems():
query = self._replace_keyword(query, spires_keyword,\
invenio_keyword)
return query
def _replace_keyword(self, query, old_keyword, new_keyword):
"""Replaces old keyword in the query with a new keyword"""
# perform case insensitive replacement with regular expression
regex_string = r'\b(?P(find|and|or|not)\b[\s\(]*)' + \
old_keyword + r'(?P[\s\(]+|$)'
regular_expression = re.compile(regex_string, re.IGNORECASE)
result = regular_expression.sub(r'\g' + new_keyword + r'\g', query)
result = re.sub(':\s+', ':', result)
return result
def _distribute_keywords_across_combinations(self, query):
# method used for replacement with regular expression
def create_replacement_pattern(match):
# the regular expression where this group name is defined is in
# the method _compile_regular_expressions()
return match.group('keyword') + ' ' + match.group('content') + \
' ' + match.group('combination') + ' ' + match.group('keyword') + ' ' + match.group('last_content')
query = self._re_distribute_keywords.sub(create_replacement_pattern, query)
return query