Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90640963
webjournal_utils.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 3, 12:24
Size
62 KB
Mime Type
text/x-python
Expires
Tue, Nov 5, 12:24 (2 d)
Engine
blob
Format
Raw Data
Handle
22113459
Attached To
R3600 invenio-infoscience
webjournal_utils.py
View Options
# -*- coding: utf-8 -*-
##
## This file is part of CDS Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## CDS Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## CDS Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
"""
Various utilities for WebJournal, e.g. config parser, etc.
"""
import
time
import
datetime
import
calendar
import
re
import
os
import
cPickle
import
math
import
urllib
from
MySQLdb
import
OperationalError
from
xml.dom
import
minidom
from
urlparse
import
urlparse
from
invenio.config
import
\
CFG_ETCDIR
,
\
CFG_SITE_URL
,
\
CFG_CACHEDIR
,
\
CFG_SITE_LANG
,
\
CFG_ACCESS_CONTROL_LEVEL_SITE
,
\
CFG_SITE_SUPPORT_EMAIL
from
invenio.dbquery
import
run_sql
from
invenio.bibformat_engine
import
BibFormatObject
from
invenio.search_engine
import
search_pattern
from
invenio.messages
import
gettext_set_language
from
invenio.errorlib
import
register_exception
########################### REGULAR EXPRESSIONS ######################
header_pattern
=
re
.
compile
(
'<p\s*(align=justify)??>\s*<strong>(?P<header>.*?)</strong>\s*</p>'
)
header_pattern2
=
re
.
compile
(
'<p\s*(class="articleHeader").*?>(?P<header>.*?)</p>'
)
para_pattern
=
re
.
compile
(
'<p.*?>(?P<paragraph>.+?)</p>'
,
re
.
DOTALL
)
img_pattern
=
re
.
compile
(
'<img.*?src=("|
\'
)?(?P<image>\S+?)("|
\'
|\s).*?/>'
,
re
.
DOTALL
)
image_pattern
=
re
.
compile
(
r'''
(<a\s*href=["']?(?P<hyperlink>\S*)["']?>)?# get the link location for the image
\s*# after each tag we can have arbitrary whitespaces
<center># the image is always centered
\s*
<img\s*(class=["']imageScale["'])*?\s*src=(?P<image>\S*)\s*border=1\s*(/)?># getting the image itself
\s*
</center>
\s*
(</a>)?
(<br />|<br />|<br/>)*# the caption can be separated by any nr of line breaks
(
<b>
\s*
<i>
\s*
<center>(?P<caption>.*?)</center># getting the caption
\s*
</i>
\s*
</b>
)?'''
,
re
.
DOTALL
|
re
.
VERBOSE
|
re
.
IGNORECASE
)
#'
############################## FEATURED RECORDS ######################
def
get_featured_records
(
journal_name
):
"""
Returns the 'featured' records i.e. records chosen to be displayed
with an image on the main page, in the widgets section, for the
given journal.
parameter:
journal_name - (str) the name of the journal for which we want
to get the featured records
returns:
list of tuples (recid, img_url)
"""
try
:
feature_file
=
open
(
'
%s
/webjournal/
%s
/featured_record'
%
\
(
CFG_ETCDIR
,
journal_name
))
except
:
return
[]
records
=
feature_file
.
readlines
()
return
[(
record
.
split
(
'---'
,
1
)[
0
],
record
.
split
(
'---'
,
1
)[
1
])
\
for
record
in
records
if
"---"
in
record
]
def
add_featured_record
(
journal_name
,
recid
,
img_url
):
"""
Adds the given record to the list of featured records of the given
journal.
parameters:
journal_name - (str) the name of the journal to which the record
should be added.
recid - (int) the record id of the record to be featured.
img_url - (str) a url to an image icon displayed along the
featured record.
returns:
0 if everything went ok
1 if record is already in the list
2 if other problems
"""
# Check that record is not already there
featured_records
=
get_featured_records
(
journal_name
)
for
featured_recid
,
featured_img
in
featured_records
:
if
featured_recid
==
str
(
recid
):
return
1
try
:
fptr
=
open
(
'
%s
/webjournal/
%s
/featured_record'
%
(
CFG_ETCDIR
,
journal_name
),
"a"
)
fptr
.
write
(
str
(
recid
)
+
'---'
+
img_url
+
'
\n
'
)
fptr
.
close
()
except
:
return
2
return
0
def
remove_featured_record
(
journal_name
,
recid
):
"""
Removes the given record from the list of featured records of the
given journal.
parameters:
journal_name - (str) the name of the journal to which the record
should be added.
recid - (int) the record id of the record to be featured.
"""
featured_records
=
get_featured_records
(
journal_name
)
try
:
fptr
=
open
(
'
%s
/webjournal/
%s
/featured_record'
%
(
CFG_ETCDIR
,
journal_name
),
"w"
)
for
featured_recid
,
featured_img
in
featured_records
:
if
str
(
featured_recid
)
!=
str
(
recid
):
fptr
.
write
(
str
(
featured_recid
)
+
'---'
+
featured_img
+
\
'
\n
'
)
fptr
.
close
()
except
:
return
1
return
0
############################ ARTICLES RELATED ########################
def
get_order_dict_from_recid_list
(
recids
,
journal_name
,
issue_number
,
newest_first
=
False
,
newest_only
=
False
):
"""
Returns the ordered list of input recids, for given
'issue_number'.
Since there might be several articles at the same position, the
returned structure is a dictionary with keys being order number
indicated in record metadata, and values being list of recids for
this order number (recids for one position are ordered from
highest to lowest recid).
Eg: {'1': [2390, 2386, 2385],
'3': [2388],
'2': [2389],
'4': [2387]}
Parameters:
recids - a list of all recid's that should be brought
into order
journal_name - the name of the journal
issue_number - *str* the issue_number for which we are
deriving the order
newest_first - *bool* if True, new articles should be placed
at beginning of the list. If so, their
position/order will be negative integers
newest_only - *bool* if only new articles should be returned
Returns:
ordered_records: a dictionary with the recids ordered by
keys
"""
ordered_records
=
{}
ordered_new_records
=
{}
records_without_defined_order
=
[]
new_records_without_defined_order
=
[]
for
record
in
recids
:
temp_rec
=
BibFormatObject
(
record
)
articles_info
=
temp_rec
.
fields
(
'773__'
)
for
article_info
in
articles_info
:
if
article_info
.
get
(
'n'
,
''
)
==
issue_number
or
\
'0'
+
article_info
.
get
(
'n'
,
''
)
==
issue_number
:
if
article_info
.
has_key
(
'c'
)
and
\
article_info
[
'c'
]
.
isdigit
():
order_number
=
int
(
article_info
.
get
(
'c'
,
''
))
if
(
newest_first
or
newest_only
)
and
\
is_new_article
(
journal_name
,
issue_number
,
record
):
if
ordered_new_records
.
has_key
(
order_number
):
ordered_new_records
[
order_number
]
.
append
(
record
)
else
:
ordered_new_records
[
order_number
]
=
[
record
]
elif
not
newest_only
:
if
ordered_records
.
has_key
(
order_number
):
ordered_records
[
order_number
]
.
append
(
record
)
else
:
ordered_records
[
order_number
]
=
[
record
]
else
:
# No order? No problem! Append it at the end.
if
newest_first
and
is_new_article
(
journal_name
,
issue_number
,
record
):
new_records_without_defined_order
.
append
(
record
)
elif
not
newest_only
:
records_without_defined_order
.
append
(
record
)
# Append records without order at the end of the list
if
records_without_defined_order
:
if
ordered_records
:
ordered_records
[
max
(
ordered_records
.
keys
())
+
1
]
=
records_without_defined_order
else
:
ordered_records
[
1
]
=
records_without_defined_order
# Append new records without order at the end of the list of new
# records
if
new_records_without_defined_order
:
if
ordered_new_records
:
ordered_new_records
[
max
(
ordered_new_records
.
keys
())
+
1
]
=
new_records_without_defined_order
else
:
ordered_new_records
[
1
]
=
new_records_without_defined_order
# Append new records at the beginning of the list of 'old'
# records. To do so, use negative integers
if
ordered_new_records
:
highest_new_record_order
=
max
(
ordered_new_records
.
keys
())
for
order
,
new_records
in
ordered_new_records
.
iteritems
():
ordered_records
[
-
highest_new_record_order
+
order
-
1
]
=
new_records
for
(
order
,
records
)
in
ordered_records
.
iteritems
():
# Reverse so that if there are several articles at same
# positon, newest appear first
records
.
reverse
()
return
ordered_records
def
get_journal_articles
(
journal_name
,
issue
,
category
,
newest_first
=
False
,
newest_only
=
False
):
"""
Returns the recids in given category and journal, for given issue
number. The returned recids are grouped according to their 773__c
field.
Example of returned value:
{'1': [2390, 2386, 2385],
'3': [2388],
'2': [2389],
'4': [2387]}
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
issue - *str* the issue. Eg: "08/2007"
category - *str* the name of the category
newest_first - *bool* if True, new articles should be placed
at beginning of the list. If so, their
position/order will be negative integers
newest_only - *bool* if only new articles should be returned
"""
use_cache
=
True
current_issue
=
get_current_issue
(
CFG_SITE_LANG
,
journal_name
)
if
issue_is_later_than
(
issue
,
current_issue
):
# If we are working on unreleased issue, do not use caching
# mechanism
use_cache
=
False
if
use_cache
:
cached_articles
=
_get_cached_journal_articles
(
journal_name
,
issue
,
category
)
if
cached_articles
is
not
None
:
ordered_articles
=
get_order_dict_from_recid_list
(
cached_articles
,
journal_name
,
issue
,
newest_first
,
newest_only
)
return
ordered_articles
# Retrieve the list of rules that map Category -> Search Pattern.
# Keep only the rule matching our category
config_strings
=
get_xml_from_config
([
"record/rule"
],
journal_name
)
category_to_search_pattern_rules
=
config_strings
[
"record/rule"
]
try
:
matching_rule
=
[
rule
.
split
(
','
,
1
)
for
rule
in
\
category_to_search_pattern_rules
\
if
rule
.
split
(
','
)[
0
]
==
category
]
except
:
return
[]
recids_issue
=
search_pattern
(
p
=
'773__n:
%s
'
%
issue
)
recids_rule
=
search_pattern
(
p
=
matching_rule
[
0
][
1
])
if
issue
[
0
]
==
'0'
:
# search for 09/ and 9/
recids_issue
.
union_update
(
search_pattern
(
p
=
'773__n:
%s
'
%
issue
.
lstrip
(
'0'
)))
recids_rule
.
intersection_update
(
recids_issue
)
recids
=
list
(
recids_rule
)
if
use_cache
:
_cache_journal_articles
(
journal_name
,
issue
,
category
,
recids
)
ordered_articles
=
get_order_dict_from_recid_list
(
recids
,
journal_name
,
issue
,
newest_first
,
newest_only
)
return
ordered_articles
def
_cache_journal_articles
(
journal_name
,
issue
,
category
,
articles
):
"""
Caches given articles IDs.
"""
journal_cache_path
=
get_journal_article_cache_path
(
journal_name
,
issue
)
try
:
journal_cache_file
=
open
(
journal_cache_path
,
'r'
)
journal_info
=
cPickle
.
load
(
journal_cache_file
)
journal_cache_file
.
close
()
except
cPickle
.
PickleError
,
e
:
journal_info
=
{}
except
IOError
:
journal_info
=
{}
except
EOFError
:
journal_info
=
{}
if
not
journal_info
.
has_key
(
'journal_articles'
):
journal_info
[
'journal_articles'
]
=
{}
journal_info
[
'journal_articles'
][
category
]
=
articles
# Create cache directory if it does not exist
journal_cache_dir
=
os
.
path
.
dirname
(
journal_cache_path
)
if
not
os
.
path
.
exists
(
journal_cache_dir
):
try
:
os
.
makedirs
(
journal_cache_dir
)
except
:
return
False
journal_cache_file
=
open
(
journal_cache_path
,
'w'
)
cPickle
.
dump
(
journal_info
,
journal_cache_file
)
journal_cache_file
.
close
()
return
True
def
_get_cached_journal_articles
(
journal_name
,
issue
,
category
):
"""
Retrieve the articles IDs cached for this journal.
Returns None if cache does not exist or more than 5 minutes old
"""
# Check if our cache is more or less up-to-date (not more than 5
# minutes old)
try
:
journal_cache_path
=
get_journal_article_cache_path
(
journal_name
,
issue
)
last_update
=
os
.
path
.
getctime
(
journal_cache_path
)
except
Exception
,
e
:
return
None
now
=
time
.
time
()
if
(
last_update
+
5
*
60
)
<
now
:
return
None
# Get from cache
try
:
journal_cache_file
=
open
(
journal_cache_path
,
'r'
)
journal_info
=
cPickle
.
load
(
journal_cache_file
)
journal_articles
=
journal_info
.
get
(
'journal_articles'
,
{})
.
get
(
category
,
None
)
journal_cache_file
.
close
()
except
cPickle
.
PickleError
,
e
:
journal_articles
=
None
except
IOError
:
journal_articles
=
None
except
EOFError
:
journal_articles
=
None
return
journal_articles
def
is_new_article
(
journal_name
,
issue
,
recid
):
"""
Check if given article should be considered as new or not.
New articles are articles that have never appeared in older issues
than given one.
"""
article_found_in_older_issue
=
False
temp_rec
=
BibFormatObject
(
recid
)
publication_blocks
=
temp_rec
.
fields
(
'773__'
)
for
publication_block
in
publication_blocks
:
this_issue_number
,
this_issue_year
=
issue
.
split
(
'/'
)
issue_number
,
issue_year
=
publication_block
.
get
(
'n'
,
'/'
)
.
split
(
'/'
,
1
)
if
int
(
issue_year
)
<
int
(
this_issue_year
):
# Found an older issue
article_found_in_older_issue
=
True
break
elif
int
(
issue_year
)
==
int
(
this_issue_year
)
and
\
int
(
issue_number
)
<
int
(
this_issue_number
):
# Found an older issue
article_found_in_older_issue
=
True
break
return
not
article_found_in_older_issue
############################ CATEGORIES RELATED ######################
def
get_journal_categories
(
journal_name
,
issue
=
None
):
"""
List the categories for the given journal and issue.
Returns categories in same order as in config file.
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
issue - *str* the issue. Eg:'08/2007'. If None, consider
all categories defined in journal config
"""
categories
=
[]
current_issue
=
get_current_issue
(
CFG_SITE_LANG
,
journal_name
)
config_strings
=
get_xml_from_config
([
"record/rule"
],
journal_name
)
all_categories
=
[
rule
.
split
(
','
)[
0
]
for
rule
in
\
config_strings
[
"record/rule"
]]
if
issue
is
None
:
return
all_categories
for
category
in
all_categories
:
recids
=
get_journal_articles
(
journal_name
,
issue
,
category
)
if
len
(
recids
.
keys
())
>
0
:
categories
.
append
(
category
)
return
categories
def
get_category_query
(
journal_name
,
category
):
"""
Returns the category definition for the given category and journal name
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
categoy - *str* a category name, as found in the XML config
"""
config_strings
=
get_xml_from_config
([
"record/rule"
],
journal_name
)
category_to_search_pattern_rules
=
config_strings
[
"record/rule"
]
try
:
matching_rule
=
[
rule
.
split
(
','
,
1
)[
1
]
.
strip
()
for
rule
in
\
category_to_search_pattern_rules
\
if
rule
.
split
(
','
)[
0
]
==
category
]
except
:
return
None
return
matching_rule
[
0
]
######################### JOURNAL CONFIG VARS ######################
cached_parsed_xml_config
=
{}
def
get_xml_from_config
(
nodes
,
journal_name
):
"""
Returns values from the journal configuration file.
The needed values can be specified by node name, or by a hierarchy
of nodes names using '/' as character to mean 'descendant of'.
Eg. 'record/rule' to get all the values of 'rule' tags inside the
'record' node
Returns a dictionary with a key for each query and a list of
strings (innerXml) results for each key.
Has a special field "config_fetching_error" that returns an error when
something has gone wrong.
"""
# Get and open the config file
results
=
{}
if
cached_parsed_xml_config
.
has_key
(
journal_name
):
config_file
=
cached_parsed_xml_config
[
journal_name
]
else
:
config_path
=
'
%s
/webjournal/
%s
/
%s
-config.xml'
%
\
(
CFG_ETCDIR
,
journal_name
,
journal_name
)
config_file
=
minidom
.
Document
try
:
config_file
=
minidom
.
parse
(
"
%s
"
%
config_path
)
except
:
# todo: raise exception "error: no config file found"
results
[
"config_fetching_error"
]
=
"could not find config file"
return
results
else
:
cached_parsed_xml_config
[
journal_name
]
=
config_file
for
node_path
in
nodes
:
node
=
config_file
for
node_path_component
in
node_path
.
split
(
'/'
):
# pylint: disable=E1103
# The node variable can be rewritten in the loop and therefore
# its type can change.
if
node
!=
config_file
and
node
.
length
>
0
:
# We have a NodeList object: consider only first child
node
=
node
.
item
(
0
)
# pylint: enable=E1103
try
:
node
=
node
.
getElementsByTagName
(
node_path_component
)
except
:
# WARNING, config did not have such value
node
=
[]
break
results
[
node_path
]
=
[]
for
result
in
node
:
try
:
result_string
=
result
.
firstChild
.
toxml
(
encoding
=
"utf-8"
)
except
:
# WARNING, config did not have such value
continue
results
[
node_path
]
.
append
(
result_string
)
return
results
def
get_journal_issue_field
(
journal_name
):
"""
Returns the MARC field in which this journal expects to find
the issue number. Read this from the journal config file
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
"""
config_strings
=
get_xml_from_config
([
"issue_number"
],
journal_name
)
try
:
issue_field
=
config_strings
[
"issue_number"
][
0
]
except
:
issue_field
=
'773__n'
return
issue_field
def
get_journal_css_url
(
journal_name
,
type
=
'screen'
):
"""
Returns URL to this journal's CSS.
Parameters:
journal_name - *str* the name of the journal (as used in URLs)
type - *str* 'screen' or 'print', depending on the kind
of CSS
"""
config_strings
=
get_xml_from_config
([
type
],
journal_name
)
css_path
=
''
try
:
css_path
=
config_strings
[
"screen"
][
0
]
except
Exception
:
register_exception
(
req
=
None
,
suffix
=
"No css file for journal
%s
. Is this right?"
%
\
journal_name
)
return
CFG_SITE_URL
+
'/'
+
css_path
def
get_journal_submission_params
(
journal_name
):
"""
Returns the (doctype, identifier element, identifier field) for
the submission of articles in this journal, so that it is possible
to build direct submission links.
Parameter:
journal_name - *str* the name of the journal (as used in URLs)
"""
doctype
=
''
identifier_field
=
''
identifier_element
=
''
config_strings
=
get_xml_from_config
([
"submission/doctype"
],
journal_name
)
if
config_strings
.
get
(
'submission/doctype'
,
''
):
doctype
=
config_strings
[
'submission/doctype'
][
0
]
config_strings
=
get_xml_from_config
([
"submission/identifier_element"
],
journal_name
)
if
config_strings
.
get
(
'submission/identifier_element'
,
''
):
identifier_element
=
config_strings
[
'submission/identifier_element'
][
0
]
config_strings
=
get_xml_from_config
([
"submission/identifier_field"
],
journal_name
)
if
config_strings
.
get
(
'submission/identifier_field'
,
''
):
identifier_field
=
config_strings
[
'submission/identifier_field'
][
0
]
else
:
identifier_field
=
'037__a'
return
(
doctype
,
identifier_element
,
identifier_field
)
def
get_journal_draft_keyword_to_remove
(
journal_name
):
"""
Returns the keyword that should be removed from the article
metadata in order to move the article from Draft to Ready
"""
config_strings
=
get_xml_from_config
([
"draft_keyword"
],
journal_name
)
if
config_strings
.
get
(
'draft_keyword'
,
''
):
return
config_strings
[
'draft_keyword'
][
0
]
return
''
def
get_journal_alert_sender_email
(
journal_name
):
"""
Returns the email address that should be used as send of the alert
email.
If not specified, use CFG_SITE_SUPPORT_EMAIL
"""
config_strings
=
get_xml_from_config
([
"alert_sender"
],
journal_name
)
if
config_strings
.
get
(
'alert_sender'
,
''
):
return
config_strings
[
'alert_sender'
][
0
]
return
CFG_SITE_SUPPORT_EMAIL
def
get_journal_alert_recipient_email
(
journal_name
):
"""
Returns the default email address of the recipients of the email
Return a string of comma-separated emails.
"""
config_strings
=
get_xml_from_config
([
"alert_recipients"
],
journal_name
)
if
config_strings
.
get
(
'alert_recipients'
,
''
):
return
config_strings
[
'alert_recipients'
][
0
]
return
''
def
get_journal_template
(
template
,
journal_name
,
ln
=
CFG_SITE_LANG
):
"""
Returns the journal templates name for the given template type
Raise an exception if template cannot be found.
"""
from
invenio.webjournal_config
import
\
InvenioWebJournalTemplateNotFoundError
config_strings
=
get_xml_from_config
([
template
],
journal_name
)
try
:
index_page_template
=
'webjournal'
+
os
.
sep
+
\
config_strings
[
template
][
0
]
except
:
raise
InvenioWebJournalTemplateNotFoundError
(
ln
,
journal_name
,
template
)
return
index_page_template
def
get_journal_name_intl
(
journal_name
,
ln
=
CFG_SITE_LANG
):
"""
Returns the nice name of the journal, translated if possible
"""
_
=
gettext_set_language
(
ln
)
config_strings
=
get_xml_from_config
([
"niceName"
],
journal_name
)
if
config_strings
.
get
(
'niceName'
,
''
):
return
_
(
config_strings
[
'niceName'
][
0
])
return
''
def
get_journal_languages
(
journal_name
):
"""
Returns the list of languages defined for this journal
"""
config_strings
=
get_xml_from_config
([
"languages"
],
journal_name
)
if
config_strings
.
get
(
'languages'
,
''
):
return
[
ln
.
strip
()
for
ln
in
\
config_strings
[
'languages'
][
0
]
.
split
(
','
)]
return
[]
def
get_journal_issue_grouping
(
journal_name
):
"""
Returns the number of issue that are typically released at the
same time.
This is used if every two weeks you release an issue that should
contains issue of next 2 weeks (eg. at week 16, you relase an
issue named '16-17/2009')
This number should help in the admin interface to guess how to
release the next issue (can be overidden by user).
"""
config_strings
=
get_xml_from_config
([
"issue_grouping"
],
journal_name
)
if
config_strings
.
get
(
'issue_grouping'
,
''
):
issue_grouping
=
config_strings
[
'issue_grouping'
][
0
]
if
issue_grouping
.
isdigit
()
and
int
(
issue_grouping
)
>
0
:
return
int
(
issue_grouping
)
return
1
def
get_journal_nb_issues_per_year
(
journal_name
):
"""
Returns the default number of issues per year for this journal.
This number should help in the admin interface to guess the next
issue number (can be overidden by user).
"""
config_strings
=
get_xml_from_config
([
"issues_per_year"
],
journal_name
)
if
config_strings
.
get
(
'issues_per_year'
,
''
):
issues_per_year
=
config_strings
[
'issues_per_year'
][
0
]
if
issues_per_year
.
isdigit
()
and
int
(
issues_per_year
)
>
0
:
return
int
(
issues_per_year
)
return
52
def
get_journal_preferred_language
(
journal_name
,
ln
):
"""
Returns the most adequate language to display the journal, given a
language.
"""
languages
=
get_journal_languages
(
journal_name
)
if
ln
in
languages
:
return
ln
elif
CFG_SITE_LANG
in
languages
:
return
CFG_SITE_LANG
elif
languages
:
return
languages
else
:
return
CFG_SITE_LANG
def
get_unreleased_issue_hiding_mode
(
journal_name
):
"""
Returns how unreleased issue should be treated. Can be one of the
following string values:
'future' - only future unreleased issues are hidden. Past
unreleased one can be viewed
'all' - any unreleased issue (past and future) have to be
hidden
- 'none' - no unreleased issue is hidden
"""
config_strings
=
get_xml_from_config
([
"hide_unreleased_issues"
],
journal_name
)
if
config_strings
.
get
(
'hide_unreleased_issues'
,
''
):
hide_unreleased_issues
=
config_strings
[
'hide_unreleased_issues'
][
0
]
if
hide_unreleased_issues
in
[
'future'
,
'all'
,
'none'
]:
return
hide_unreleased_issues
return
'all'
def
get_first_issue_from_config
(
journal_name
):
"""
Returns the first issue as defined from config. This should only
be useful when no issue have been released.
If not specified, returns the issue made of current week number
and year.
"""
config_strings
=
get_xml_from_config
([
"first_issue"
],
journal_name
)
if
config_strings
.
has_key
(
'first_issue'
):
return
config_strings
[
'first_issue'
][
0
]
return
time
.
strftime
(
"%W/%Y"
,
time
.
localtime
())
######################## TIME / ISSUE FUNCTIONS ######################
def
get_current_issue
(
ln
,
journal_name
):
"""
Returns the current issue of a journal as a string.
Current issue is the latest released issue.
"""
journal_id
=
get_journal_id
(
journal_name
,
ln
)
try
:
current_issue
=
run_sql
(
"""SELECT issue_number
FROM jrnISSUE
WHERE date_released <= NOW()
AND id_jrnJOURNAL=%s
ORDER BY date_released DESC
LIMIT 1"""
,
(
journal_id
,))[
0
][
0
]
except
:
# start the first journal ever
current_issue
=
get_first_issue_from_config
(
journal_name
)
run_sql
(
"""INSERT INTO jrnISSUE (id_jrnJOURNAL, issue_number, issue_display)
VALUES(%s, %s, %s)"""
,
(
journal_id
,
current_issue
,
current_issue
))
return
current_issue
def
get_all_released_issues
(
journal_name
):
"""
Returns the list of released issue, ordered by release date
Note that it only includes the issues that are considered as
released in the DB: it will not for example include articles that
have been imported in the system but not been released
"""
journal_id
=
get_journal_id
(
journal_name
)
res
=
run_sql
(
"""SELECT issue_number
FROM jrnISSUE
WHERE id_jrnJOURNAL = %s
AND UNIX_TIMESTAMP(date_released) != 0
ORDER BY date_released DESC"""
,
(
journal_id
,))
if
res
:
return
[
row
[
0
]
for
row
in
res
]
else
:
return
[]
def
get_next_journal_issues
(
current_issue_number
,
journal_name
,
n
=
2
):
"""
This function suggests the 'n' next issue numbers
"""
number
,
year
=
current_issue_number
.
split
(
'/'
,
1
)
number
=
int
(
number
)
year
=
int
(
year
)
number_issues_per_year
=
get_journal_nb_issues_per_year
(
journal_name
)
next_issues
=
[
make_issue_number
(
journal_name
,
((
number
-
1
+
i
)
%
(
number_issues_per_year
))
+
1
,
year
+
((
number
-
1
+
i
)
/
number_issues_per_year
))
\
for
i
in
range
(
1
,
n
+
1
)]
return
next_issues
def
get_grouped_issues
(
journal_name
,
issue_number
):
"""
Returns all the issues grouped with a given one.
Issues are sorted from the oldest to newest one.
"""
grouped_issues
=
[]
journal_id
=
get_journal_id
(
journal_name
,
CFG_SITE_LANG
)
issue_display
=
get_issue_number_display
(
issue_number
,
journal_name
)
res
=
run_sql
(
"""SELECT issue_number
FROM jrnISSUE
WHERE id_jrnJOURNAL=%s AND issue_display=%s"""
,
(
journal_id
,
issue_display
))
if
res
:
grouped_issues
=
[
row
[
0
]
for
row
in
res
]
grouped_issues
.
sort
(
compare_issues
)
return
grouped_issues
def
compare_issues
(
issue1
,
issue2
):
"""
Comparison function for issues.
Returns:
-1 if issue1 is older than issue2
0 if issues are equal
1 if issue1 is newer than issue2
"""
issue1_number
,
issue1_year
=
issue1
.
split
(
'/'
,
1
)
issue2_number
,
issue2_year
=
issue2
.
split
(
'/'
,
1
)
if
int
(
issue1_year
)
==
int
(
issue2_year
):
return
cmp
(
int
(
issue1_number
),
int
(
issue2_number
))
else
:
return
cmp
(
int
(
issue1_year
),
int
(
issue2_year
))
def
issue_is_later_than
(
issue1
,
issue2
):
"""
Returns true if issue1 is later than issue2
"""
issue_number1
,
issue_year1
=
issue1
.
split
(
'/'
,
1
)
issue_number2
,
issue_year2
=
issue2
.
split
(
'/'
,
1
)
if
int
(
issue_year1
)
>
int
(
issue_year2
):
return
True
elif
int
(
issue_year1
)
==
int
(
issue_year2
):
return
int
(
issue_number1
)
>
int
(
issue_number2
)
else
:
return
False
def
get_issue_number_display
(
issue_number
,
journal_name
,
ln
=
CFG_SITE_LANG
):
"""
Returns the display string for a given issue number.
"""
journal_id
=
get_journal_id
(
journal_name
,
ln
)
issue_display
=
run_sql
(
"""SELECT issue_display
FROM jrnISSUE
WHERE issue_number=%s
AND id_jrnJOURNAL=%s"""
,
(
issue_number
,
journal_id
))
if
issue_display
:
return
issue_display
[
0
][
0
]
else
:
# Not yet released...
return
issue_number
def
make_issue_number
(
journal_name
,
number
,
year
,
for_url_p
=
False
):
"""
Creates a normalized issue number representation with given issue
number (as int or str) and year (as int or str).
Reverse the year and number if for_url_p is True
"""
number_issues_per_year
=
get_journal_nb_issues_per_year
(
journal_name
)
precision
=
len
(
str
(
number_issues_per_year
))
number
=
int
(
str
(
number
))
year
=
int
(
str
(
year
))
if
for_url_p
:
return
(
"
%i
/%0"
+
str
(
precision
)
+
"i"
)
%
\
(
year
,
number
)
else
:
return
(
"%0"
+
str
(
precision
)
+
"i/
%i
"
)
%
\
(
number
,
year
)
def
get_release_datetime
(
issue
,
journal_name
,
ln
=
CFG_SITE_LANG
):
"""
Gets the date at which an issue was released from the DB.
Returns None if issue has not yet been released.
See issue_to_datetime() to get the *theoretical* release time of an
issue.
"""
journal_id
=
get_journal_id
(
journal_name
,
ln
)
try
:
release_date
=
run_sql
(
"""SELECT date_released
FROM jrnISSUE
WHERE issue_number=%s
AND id_jrnJOURNAL=%s"""
,
(
issue
,
journal_id
))[
0
][
0
]
except
:
return
None
if
release_date
:
return
release_date
else
:
return
None
def
get_announcement_datetime
(
issue
,
journal_name
,
ln
=
CFG_SITE_LANG
):
"""
Get the date at which an issue was announced through the alert system.
Return None if not announced
"""
journal_id
=
get_journal_id
(
journal_name
,
ln
)
try
:
announce_date
=
run_sql
(
"""SELECT date_announced
FROM jrnISSUE
WHERE issue_number=%s
AND id_jrnJOURNAL=%s"""
,
(
issue
,
journal_id
))[
0
][
0
]
except
:
return
None
if
announce_date
:
return
announce_date
else
:
return
None
def
datetime_to_issue
(
issue_datetime
,
journal_name
):
"""
Returns the issue corresponding to the given datetime object.
If issue_datetime is too far in the future or in the past, gives
the best possible matching issue, or None, if it does not seem to
exist.
#If issue_datetime is too far in the future, return the latest
#released issue.
#If issue_datetime is too far in the past, return None
Parameters:
issue_datetime - *datetime* date of the issue to be retrieved
journal_name - *str* the name of the journal (as used in URLs)
"""
issue_number
=
None
journal_id
=
get_journal_id
(
journal_name
)
# Try to discover how much days an issue is valid
nb_issues_per_year
=
get_journal_nb_issues_per_year
(
journal_name
)
this_year_number_of_days
=
365
if
calendar
.
isleap
(
issue_datetime
.
year
):
this_year_number_of_days
=
366
issue_day_lifetime
=
math
.
ceil
(
float
(
this_year_number_of_days
)
/
nb_issues_per_year
)
res
=
run_sql
(
"""SELECT issue_number, date_released
FROM jrnISSUE
WHERE date_released < %s
AND id_jrnJOURNAL = %s
ORDER BY date_released DESC LIMIT 1"""
,
(
issue_datetime
,
journal_id
))
if
res
and
res
[
0
][
1
]:
issue_number
=
res
[
0
][
0
]
issue_release_date
=
res
[
0
][
1
]
# Check that the result is not too far in the future:
if
issue_release_date
+
datetime
.
timedelta
(
issue_day_lifetime
)
<
issue_datetime
:
# In principle, the latest issue will no longer be valid
# at that time
return
None
else
:
# Mmh, are we too far in the past? This can happen in the case
# of articles that have been imported in the system but never
# considered as 'released' in the database. So we should still
# try to approximate/match an issue:
if
round
(
issue_day_lifetime
)
in
[
6
,
7
,
8
]:
# Weekly issues. We can use this information to better
# match the issue number
issue_nb
=
int
(
issue_datetime
.
strftime
(
'%W'
))
# = week number
else
:
# Compute the number of days since beginning of year, and
# divide by the lifetime of an issue: we get the
# approximate issue_number
issue_nb
=
math
.
ceil
((
int
(
issue_datetime
.
strftime
(
'%j'
))
/
issue_day_lifetime
))
issue_number
=
(
"%0"
+
str
(
len
(
str
(
nb_issues_per_year
)))
+
"i/
%i
"
)
%
(
issue_nb
,
issue_datetime
.
year
)
# Now check if this issue exists in the system for this
# journal
if
not
get_journal_categories
(
journal_name
,
issue_number
):
# This issue did not exist
return
None
return
issue_number
DAILY
=
1
WEEKLY
=
2
MONTHLY
=
3
def
issue_to_datetime
(
issue_number
,
journal_name
,
granularity
=
None
):
"""
Returns the *theoretical* date of release for given issue: useful
if you release on Friday, but the issue date of the journal
should correspond to the next Monday.
This will correspond to the next day/week/month, depending on the
number of issues per year (or the 'granularity' if specified) and
the release time (if close to the end of a period defined by the
granularity, consider next period since release is made a bit in
advance).
See get_release_datetime() for the *real* release time of an issue
THIS FUNCTION SHOULD ONLY BE USED FOR INFORMATIVE DISPLAY PURPOSE,
AS IT GIVES APPROXIMATIVE RESULTS. Do not use it to make decisions.
Parameters:
issue_number - *str* issue number to consider
journal_name - *str* the name of the journal (as used in URLs)
granularity - *int* the granularity to consider
"""
# If we have released, we can use this information. Otherwise we
# have to approximate.
issue_date
=
get_release_datetime
(
issue_number
,
journal_name
)
if
not
issue_date
:
# Approximate release date
number
,
year
=
issue_number
.
split
(
'/'
)
number
=
int
(
number
)
year
=
int
(
year
)
nb_issues_per_year
=
get_journal_nb_issues_per_year
(
journal_name
)
this_year_number_of_days
=
365
if
calendar
.
isleap
(
year
):
this_year_number_of_days
=
366
issue_day_lifetime
=
float
(
this_year_number_of_days
)
/
nb_issues_per_year
# Compute from beginning of the year
issue_date
=
datetime
.
datetime
(
year
,
1
,
1
)
+
\
datetime
.
timedelta
(
days
=
int
(
round
((
number
-
1
)
*
issue_day_lifetime
)))
# Okay, but if last release is not too far in the past, better
# compute from the release.
current_issue
=
get_current_issue
(
CFG_SITE_LANG
,
journal_name
)
current_issue_time
=
get_release_datetime
(
current_issue
,
journal_name
)
if
current_issue_time
.
year
==
issue_date
.
year
:
current_issue_number
,
current_issue_year
=
current_issue
.
split
(
'/'
)
current_issue_number
=
int
(
current_issue_number
)
# Compute from last release
issue_date
=
current_issue_time
+
\
datetime
.
timedelta
(
days
=
int
((
number
-
current_issue_number
)
*
issue_day_lifetime
))
# If granularity is not specifed, deduce from config
if
granularity
is
None
:
nb_issues_per_year
=
get_journal_nb_issues_per_year
(
journal_name
)
if
nb_issues_per_year
>
250
:
granularity
=
DAILY
elif
nb_issues_per_year
>
40
:
granularity
=
WEEKLY
else
:
granularity
=
MONTHLY
# Now we can adapt the date to match the granularity
if
granularity
==
DAILY
:
if
issue_date
.
hour
>=
15
:
# If released after 3pm, consider it is the issue of the next
# day
issue_date
=
issue_date
+
datetime
.
timedelta
(
days
=
1
)
elif
granularity
==
WEEKLY
:
(
year
,
week_nb
,
day_nb
)
=
issue_date
.
isocalendar
()
if
day_nb
>
4
:
# If released on Fri, Sat or Sun, consider that it is next
# week's issue.
issue_date
=
issue_date
+
datetime
.
timedelta
(
weeks
=
1
)
# Get first day of the week
issue_date
=
issue_date
-
datetime
.
timedelta
(
days
=
issue_date
.
weekday
())
else
:
if
issue_date
.
day
>
22
:
# If released last week of the month, consider release for
# next month
issue_date
=
issue_date
.
replace
(
month
=
issue_date
.
month
+
1
)
date_string
=
issue_date
.
strftime
(
"%Y %m 1"
)
issue_date
=
datetime
.
datetime
(
*
(
time
.
strptime
(
date_string
,
"%Y %m
%d
"
)[
0
:
6
]))
return
issue_date
def
get_number_of_articles_for_issue
(
issue
,
journal_name
,
ln
=
CFG_SITE_LANG
):
"""
Function that returns a dictionary with all categories and number of
articles in each category.
"""
all_articles
=
{}
categories
=
get_journal_categories
(
journal_name
,
issue
)
for
category
in
categories
:
all_articles
[
category
]
=
len
(
get_journal_articles
(
journal_name
,
issue
,
category
))
return
all_articles
########################## JOURNAL RELATED ###########################
def
get_journal_info_path
(
journal_name
):
"""
Returns the path to the info file of the given journal. The info
file should be used to get information about a journal when database
is not available.
Returns None if path cannot be determined
"""
# We must make sure we don't try to read outside of webjournal
# cache dir
info_path
=
os
.
path
.
abspath
(
"
%s
/webjournal/
%s
/info.dat"
%
\
(
CFG_CACHEDIR
,
journal_name
))
if
info_path
.
startswith
(
CFG_CACHEDIR
+
'/webjournal/'
):
return
info_path
else
:
return
None
def
get_journal_article_cache_path
(
journal_name
,
issue
):
"""
Returns the path to cache file of the articles of a given issue
Returns None if path cannot be determined
"""
# We must make sure we don't try to read outside of webjournal
# cache dir
cache_path
=
os
.
path
.
abspath
(
"
%s
/webjournal/
%s
/
%s
_articles_cache.dat"
%
\
(
CFG_CACHEDIR
,
journal_name
,
issue
.
replace
(
'/'
,
'_'
)))
if
cache_path
.
startswith
(
CFG_CACHEDIR
+
'/webjournal/'
):
return
cache_path
else
:
return
None
def
get_journal_id
(
journal_name
,
ln
=
CFG_SITE_LANG
):
"""
Get the id for this journal from the DB. If DB is down, try to get
from cache.
"""
journal_id
=
None
from
invenio.webjournal_config
import
InvenioWebJournalJournalIdNotFoundDBError
if
CFG_ACCESS_CONTROL_LEVEL_SITE
==
2
:
# do not connect to the database as the site is closed for
# maintenance:
journal_info_path
=
get_journal_info_path
(
journal_name
)
try
:
journal_info_file
=
open
(
journal_info_path
,
'r'
)
journal_info
=
cPickle
.
load
(
journal_info_file
)
journal_id
=
journal_info
.
get
(
'journal_id'
,
None
)
except
cPickle
.
PickleError
,
e
:
journal_id
=
None
except
IOError
:
journal_id
=
None
else
:
try
:
res
=
run_sql
(
"SELECT id FROM jrnJOURNAL WHERE name=
%s
"
,
(
journal_name
,))
if
len
(
res
)
>
0
:
journal_id
=
res
[
0
][
0
]
except
OperationalError
,
e
:
# Cannot connect to database. Try to read from cache
journal_info_path
=
get_journal_info_path
(
journal_name
)
try
:
journal_info_file
=
open
(
journal_info_path
,
'r'
)
journal_info
=
cPickle
.
load
(
journal_info_file
)
journal_id
=
journal_info
[
'journal_id'
]
except
cPickle
.
PickleError
,
e
:
journal_id
=
None
except
IOError
:
journal_id
=
None
if
journal_id
is
None
:
raise
InvenioWebJournalJournalIdNotFoundDBError
(
ln
,
journal_name
)
return
journal_id
def
guess_journal_name
(
ln
,
journal_name
=
None
):
"""
Tries to take a guess what a user was looking for on the server if
not providing a name for the journal, or if given journal name
does not match case of original journal.
"""
from
invenio.webjournal_config
import
InvenioWebJournalNoJournalOnServerError
from
invenio.webjournal_config
import
InvenioWebJournalNoNameError
journals_id_and_names
=
get_journals_ids_and_names
()
if
len
(
journals_id_and_names
)
==
0
:
raise
InvenioWebJournalNoJournalOnServerError
(
ln
)
elif
not
journal_name
and
\
journals_id_and_names
[
0
]
.
has_key
(
'journal_name'
):
return
journals_id_and_names
[
0
][
'journal_name'
]
elif
len
(
journals_id_and_names
)
>
0
:
possible_journal_names
=
[
journal_id_and_name
[
'journal_name'
]
for
journal_id_and_name
\
in
journals_id_and_names
\
if
journal_id_and_name
.
get
(
'journal_name'
,
''
)
.
lower
()
==
journal_name
.
lower
()]
if
possible_journal_names
:
return
possible_journal_names
[
0
]
else
:
raise
InvenioWebJournalNoNameError
(
ln
)
else
:
raise
InvenioWebJournalNoNameError
(
ln
)
def
get_journals_ids_and_names
():
"""
Returns the list of existing journals IDs and names. Try to read
from the DB, or from cache if DB is not accessible.
"""
journals
=
[]
if
CFG_ACCESS_CONTROL_LEVEL_SITE
==
2
:
# do not connect to the database as the site is closed for
# maintenance:
files
=
os
.
listdir
(
"
%s
/webjournal"
%
CFG_CACHEDIR
)
info_files
=
[
path
+
os
.
sep
+
'info.dat'
for
path
in
files
if
\
os
.
path
.
isdir
(
path
)
and
\
os
.
path
.
exists
(
path
+
os
.
sep
+
'info.dat'
)]
for
info_file
in
info_files
:
try
:
journal_info_file
=
open
(
info_file
,
'r'
)
journal_info
=
cPickle
.
load
(
journal_info_file
)
journal_id
=
journal_info
.
get
(
'journal_id'
,
None
)
journal_name
=
journal_info
.
get
(
'journal_name'
,
None
)
current_issue
=
journal_info
.
get
(
'current_issue'
,
None
)
if
journal_id
is
not
None
and
\
journal_name
is
not
None
:
journals
.
append
({
'journal_id'
:
journal_id
,
'journal_name'
:
journal_name
,
'current_issue'
:
current_issue
})
except
cPickle
.
PickleError
,
e
:
# Well, can't do anything...
continue
except
IOError
:
# Well, can't do anything...
continue
else
:
try
:
res
=
run_sql
(
"SELECT id, name FROM jrnJOURNAL ORDER BY id"
)
for
journal_id
,
journal_name
in
res
:
journals
.
append
({
'journal_id'
:
journal_id
,
'journal_name'
:
journal_name
})
except
OperationalError
,
e
:
# Cannot connect to database. Try to read from cache
files
=
os
.
listdir
(
"
%s
/webjournal"
%
CFG_CACHEDIR
)
info_files
=
[
path
+
os
.
sep
+
'info.dat'
for
path
in
files
if
\
os
.
path
.
isdir
(
path
)
and
\
os
.
path
.
exists
(
path
+
os
.
sep
+
'info.dat'
)]
for
info_file
in
info_files
:
try
:
journal_info_file
=
open
(
info_file
,
'r'
)
journal_info
=
cPickle
.
load
(
journal_info_file
)
journal_id
=
journal_info
.
get
(
'journal_id'
,
None
)
journal_name
=
journal_info
.
get
(
'journal_name'
,
None
)
current_issue
=
journal_info
.
get
(
'current_issue'
,
None
)
if
journal_id
is
not
None
and
\
journal_name
is
not
None
:
journals
.
append
({
'journal_id'
:
journal_id
,
'journal_name'
:
journal_name
,
'current_issue'
:
current_issue
})
except
cPickle
.
PickleError
,
e
:
# Well, can't do anything...
continue
except
IOError
:
# Well, can't do anything...
continue
return
journals
def
parse_url_string
(
uri
):
"""
Centralized function to parse any url string given in
webjournal. Useful to retrieve current category, journal,
etc. from within format elements
The webjournal interface handler should already have cleaned the
URI beforehand, so that journal name exist, issue number is
correct, etc. The only remaining problem might be due to the
capitalization of journal name in contact, search and popup pages,
so clean the journal name
returns:
args: all arguments in dict form
"""
args
=
{
'journal_name'
:
''
,
'issue_year'
:
''
,
'issue_number'
:
None
,
'issue'
:
None
,
'category'
:
''
,
'recid'
:
-
1
,
'verbose'
:
0
,
'ln'
:
CFG_SITE_LANG
,
'archive_year'
:
None
,
'archive_search'
:
''
}
if
not
uri
.
startswith
(
'/journal'
):
# Mmh, incorrect context. Still, keep language if available
url_params
=
urlparse
(
uri
)[
4
]
args
[
'ln'
]
=
dict
([
part
.
split
(
'='
)
for
part
in
url_params
.
split
(
'&'
)
\
if
len
(
part
.
split
(
'='
))
==
2
])
.
get
(
'ln'
,
CFG_SITE_LANG
)
return
args
# Take everything after journal and before first question mark
splitted_uri
=
uri
.
split
(
'journal'
,
1
)
second_part
=
splitted_uri
[
1
]
splitted_uri
=
second_part
.
split
(
'?'
)
uri_middle_part
=
splitted_uri
[
0
]
uri_arguments
=
''
if
len
(
splitted_uri
)
>
1
:
uri_arguments
=
splitted_uri
[
1
]
arg_list
=
uri_arguments
.
split
(
"&"
)
args
[
'ln'
]
=
CFG_SITE_LANG
args
[
'verbose'
]
=
0
for
arg_pair
in
arg_list
:
arg_and_value
=
arg_pair
.
split
(
'='
)
if
len
(
arg_and_value
)
==
2
:
if
arg_and_value
[
0
]
==
'ln'
:
args
[
'ln'
]
=
arg_and_value
[
1
]
elif
arg_and_value
[
0
]
==
'verbose'
and
\
arg_and_value
[
1
]
.
isdigit
():
args
[
'verbose'
]
=
int
(
arg_and_value
[
1
])
elif
arg_and_value
[
0
]
==
'archive_year'
and
\
arg_and_value
[
1
]
.
isdigit
():
args
[
'archive_year'
]
=
int
(
arg_and_value
[
1
])
elif
arg_and_value
[
0
]
==
'archive_search'
:
args
[
'archive_search'
]
=
arg_and_value
[
1
]
elif
arg_and_value
[
0
]
==
'name'
:
args
[
'journal_name'
]
=
guess_journal_name
(
args
[
'ln'
],
arg_and_value
[
1
])
arg_list
=
uri_middle_part
.
split
(
"/"
)
if
len
(
arg_list
)
>
1
and
arg_list
[
1
]
not
in
[
'search'
,
'contact'
,
'popup'
]:
args
[
'journal_name'
]
=
urllib
.
unquote
(
arg_list
[
1
])
elif
arg_list
[
1
]
not
in
[
'search'
,
'contact'
,
'popup'
]:
args
[
'journal_name'
]
=
guess_journal_name
(
args
[
'ln'
],
args
[
'journal_name'
])
cur_issue
=
get_current_issue
(
args
[
'ln'
],
args
[
'journal_name'
])
if
len
(
arg_list
)
>
2
:
try
:
args
[
'issue_year'
]
=
int
(
urllib
.
unquote
(
arg_list
[
2
]))
except
:
args
[
'issue_year'
]
=
int
(
cur_issue
.
split
(
'/'
)[
1
])
else
:
args
[
'issue'
]
=
cur_issue
args
[
'issue_year'
]
=
int
(
cur_issue
.
split
(
'/'
)[
1
])
args
[
'issue_number'
]
=
int
(
cur_issue
.
split
(
'/'
)[
0
])
if
len
(
arg_list
)
>
3
:
try
:
args
[
'issue_number'
]
=
int
(
urllib
.
unquote
(
arg_list
[
3
]))
except
:
args
[
'issue_number'
]
=
int
(
cur_issue
.
split
(
'/'
)[
0
])
args
[
'issue'
]
=
make_issue_number
(
args
[
'journal_name'
],
args
[
'issue_number'
],
args
[
'issue_year'
])
if
len
(
arg_list
)
>
4
:
args
[
'category'
]
=
urllib
.
unquote
(
arg_list
[
4
])
if
len
(
arg_list
)
>
5
:
try
:
args
[
'recid'
]
=
int
(
urllib
.
unquote
(
arg_list
[
5
]))
except
:
pass
args
[
'ln'
]
=
get_journal_preferred_language
(
args
[
'journal_name'
],
args
[
'ln'
])
# FIXME : wash arguments?
return
args
def
make_journal_url
(
current_uri
,
custom_parameters
=
None
):
"""
Create a URL, using the current URI and overriding values
with the given custom_parameters
Parameters:
current_uri - *str* the current full URI
custom_parameters - *dict* a dictionary of parameters that
should override those of curent_uri
"""
if
not
custom_parameters
:
custom_parameters
=
{}
default_params
=
parse_url_string
(
current_uri
)
for
key
,
value
in
custom_parameters
.
iteritems
():
# Override default params with custom params
default_params
[
key
]
=
str
(
value
)
uri
=
CFG_SITE_URL
+
'/journal/'
if
default_params
[
'journal_name'
]:
uri
+=
urllib
.
quote
(
default_params
[
'journal_name'
])
+
'/'
if
default_params
[
'issue_year'
]
and
default_params
[
'issue_number'
]:
uri
+=
make_issue_number
(
default_params
[
'journal_name'
],
default_params
[
'issue_number'
],
default_params
[
'issue_year'
],
for_url_p
=
True
)
+
'/'
if
default_params
[
'category'
]:
uri
+=
urllib
.
quote
(
default_params
[
'category'
])
if
default_params
[
'recid'
]
and
\
default_params
[
'recid'
]
!=
-
1
:
uri
+=
'/'
+
str
(
default_params
[
'recid'
])
printed_question_mark
=
False
if
default_params
[
'ln'
]:
uri
+=
'?ln='
+
default_params
[
'ln'
]
printed_question_mark
=
True
if
default_params
[
'verbose'
]
!=
0
:
if
printed_question_mark
:
uri
+=
'&verbose='
+
str
(
default_params
[
'verbose'
])
else
:
uri
+=
'?verbose='
+
str
(
default_params
[
'verbose'
])
return
uri
############################ HTML CACHING FUNCTIONS ############################
def
cache_index_page
(
html
,
journal_name
,
category
,
issue
,
ln
):
"""
Caches the index page main area of a Bulletin
(right hand menu cannot be cached)
"""
issue
=
issue
.
replace
(
"/"
,
"_"
)
category
=
category
.
replace
(
" "
,
""
)
cache_path
=
os
.
path
.
abspath
(
'
%s
/webjournal/
%s
/
%s
_index_
%s
_
%s
.html'
%
\
(
CFG_CACHEDIR
,
journal_name
,
issue
,
category
,
ln
))
if
not
cache_path
.
startswith
(
CFG_CACHEDIR
+
'/webjournal'
):
# Mmh, not accessing correct path. Stop caching
return
False
cache_path_dir
=
'
%s
/webjournal/
%s
'
%
(
CFG_CACHEDIR
,
journal_name
)
if
not
os
.
path
.
isdir
(
cache_path_dir
):
os
.
makedirs
(
cache_path_dir
)
cached_file
=
open
(
cache_path
,
"w"
)
cached_file
.
write
(
html
)
cached_file
.
close
()
def
get_index_page_from_cache
(
journal_name
,
category
,
issue
,
ln
):
"""
Function to get an index page from the cache.
False if not in cache.
"""
issue
=
issue
.
replace
(
"/"
,
"_"
)
category
=
category
.
replace
(
" "
,
""
)
cache_path
=
os
.
path
.
abspath
(
'
%s
/webjournal/
%s
/
%s
_index_
%s
_
%s
.html'
%
\
(
CFG_CACHEDIR
,
journal_name
,
issue
,
category
,
ln
))
if
not
cache_path
.
startswith
(
CFG_CACHEDIR
+
'/webjournal'
):
# Mmh, not accessing correct path. Stop reading cache
return
False
try
:
cached_file
=
open
(
cache_path
)
.
read
()
except
:
return
False
return
cached_file
def
cache_article_page
(
html
,
journal_name
,
category
,
recid
,
issue
,
ln
):
"""
Caches an article view of a journal.
"""
issue
=
issue
.
replace
(
"/"
,
"_"
)
category
=
category
.
replace
(
" "
,
""
)
cache_path
=
os
.
path
.
abspath
(
'
%s
/webjournal/
%s
/
%s
_article_
%s
_
%s
_
%s
.html'
%
\
(
CFG_CACHEDIR
,
journal_name
,
issue
,
category
,
recid
,
ln
))
if
not
cache_path
.
startswith
(
CFG_CACHEDIR
+
'/webjournal'
):
# Mmh, not accessing correct path. Stop caching
return
cache_path_dir
=
'
%s
/webjournal/
%s
'
%
(
CFG_CACHEDIR
,
journal_name
)
if
not
os
.
path
.
isdir
(
cache_path_dir
):
os
.
makedirs
(
cache_path_dir
)
cached_file
=
open
(
cache_path
,
"w"
)
cached_file
.
write
(
html
)
cached_file
.
close
()
def
get_article_page_from_cache
(
journal_name
,
category
,
recid
,
issue
,
ln
):
"""
Gets an article view of a journal from cache.
False if not in cache.
"""
issue
=
issue
.
replace
(
"/"
,
"_"
)
category
=
category
.
replace
(
" "
,
""
)
cache_path
=
os
.
path
.
abspath
(
'
%s
/webjournal/
%s
/
%s
_article_
%s
_
%s
_
%s
.html'
%
\
(
CFG_CACHEDIR
,
journal_name
,
issue
,
category
,
recid
,
ln
))
if
not
cache_path
.
startswith
(
CFG_CACHEDIR
+
'/webjournal'
):
# Mmh, not accessing correct path. Stop reading cache
return
False
try
:
cached_file
=
open
(
cache_path
)
.
read
()
except
:
return
False
return
cached_file
def
clear_cache_for_article
(
journal_name
,
category
,
recid
,
issue
):
"""
Resets the cache for an article (e.g. after an article has been
modified)
"""
issue
=
issue
.
replace
(
"/"
,
"_"
)
category
=
category
.
replace
(
" "
,
""
)
cache_path
=
os
.
path
.
abspath
(
'
%s
/webjournal/
%s
/'
%
(
CFG_CACHEDIR
,
journal_name
))
if
not
cache_path
.
startswith
(
CFG_CACHEDIR
+
'/webjournal'
):
# Mmh, not accessing correct path. Stop deleting cache
return
False
# try to delete the article cached file
try
:
os
.
remove
(
'
%s
/webjournal/
%s
/
%s
_article_
%s
_
%s
_en.html'
%
(
CFG_CACHEDIR
,
journal_name
,
issue
,
category
,
recid
))
except
:
pass
try
:
os
.
remove
(
'
%s
/webjournal/
%s
/
%s
_article_
%s
_
%s
_fr.html'
%
(
CFG_CACHEDIR
,
journal_name
,
issue
,
category
,
recid
))
except
:
pass
# delete the index page for the category
try
:
os
.
remove
(
'
%s
/webjournal/
%s
/
%s
_index_
%s
_en.html'
%
(
CFG_CACHEDIR
,
journal_name
,
issue
,
category
))
except
:
pass
try
:
os
.
remove
(
'
%s
/webjournal/
%s
/
%s
_index_
%s
_fr.html'
%
(
CFG_CACHEDIR
,
journal_name
,
issue
,
category
))
except
:
pass
try
:
path
=
get_journal_article_cache_path
(
journal_name
,
issue
)
os
.
remove
(
path
)
except
:
pass
return
True
def
clear_cache_for_issue
(
journal_name
,
issue
):
"""
clears the cache of a whole issue.
"""
issue
=
issue
.
replace
(
"/"
,
"_"
)
cache_path_dir
=
os
.
path
.
abspath
(
'
%s
/webjournal/
%s
'
%
\
(
CFG_CACHEDIR
,
journal_name
))
if
not
cache_path_dir
.
startswith
(
CFG_CACHEDIR
+
'/webjournal'
):
# Mmh, not accessing correct path. Stop deleting cache
return
False
all_cached_files
=
os
.
listdir
(
cache_path_dir
)
non_deleted
=
[]
for
cached_file
in
all_cached_files
:
if
cached_file
.
startswith
(
issue
.
replace
(
'/'
,
'_'
)):
try
:
os
.
remove
(
cache_path_dir
+
'/'
+
cached_file
)
except
:
return
False
else
:
non_deleted
.
append
(
cached_file
)
return
True
######################### CERN SPECIFIC FUNCTIONS #################
def
get_recid_from_legacy_number
(
issue_number
,
category
,
number
):
"""
Returns the recid based on the issue number, category and
'number'.
This is used to support URLs using the now deprecated 'number'
argument. The function tries to reproduce the behaviour of the
old way of doing, even keeping some of its 'problems' (so that we
reach the same article as before with a given number)..
Returns the recid as int, or -1 if not found
"""
recids
=
[]
if
issue_number
[
0
]
==
"0"
:
alternative_issue_number
=
issue_number
[
1
:]
recids
=
list
(
search_pattern
(
p
=
'65017a:"
%s
" and 773__n:
%s
'
%
(
category
,
issue_number
)))
recids
.
extend
(
list
(
search_pattern
(
p
=
'65017a:"
%s
" and 773__n:
%s
'
%
(
category
,
alternative_issue_number
))))
else
:
recids
=
list
(
search_pattern
(
p
=
'65017:"
%s
" and 773__n:
%s
'
%
(
category
,
issue_number
)))
# Now must order the records and pick the one at index 'number'.
# But we have to take into account that there can be multiple
# records at position 1, and that these additional records should
# be numbered with negative numbers:
# 1, 1, 1, 2, 3 -> 1, -1, -2, 2, 3...
negative_index_records
=
{}
positive_index_records
=
{}
# Fill in 'negative_index_records' and 'positive_index_records'
# lists with the following loop
for
recid
in
recids
:
bfo
=
BibFormatObject
(
recid
)
order
=
[
subfield
[
'c'
]
for
subfield
in
bfo
.
fields
(
'773__'
)
if
\
issue_number
in
subfield
[
'n'
]]
if
len
(
order
)
>
0
:
# If several orders are defined for the same article and
# the same issue, keep the first one
order
=
order
[
0
]
if
order
.
isdigit
():
# Order must be an int. Otherwise skip
order
=
int
(
order
)
if
order
==
1
and
positive_index_records
.
has_key
(
1
):
# This is then a negative number for this record
index
=
(
len
(
negative_index_records
.
keys
())
>
0
and
\
min
(
negative_index_records
.
keys
())
-
1
)
or
0
negative_index_records
[
index
]
=
recid
else
:
# Positive number for this record
if
not
positive_index_records
.
has_key
(
order
):
positive_index_records
[
order
]
=
recid
else
:
# We make the assumption that we cannot have
# twice the same position for two
# articles. Previous WebJournal module was not
# clear about that. Just drop this record
# (better than crashing or looping forever..)
pass
recid_to_return
=
-
1
# Ok, we can finally pick the recid corresponding to 'number'
if
number
<=
0
:
negative_indexes
=
negative_index_records
.
keys
()
negative_indexes
.
sort
()
negative_indexes
.
reverse
()
if
len
(
negative_indexes
)
>
abs
(
number
):
recid_to_return
=
negative_index_records
[
negative_indexes
[
abs
(
number
)]]
else
:
if
positive_index_records
.
has_key
(
number
):
recid_to_return
=
positive_index_records
[
number
]
return
recid_to_return
Event Timeline
Log In to Comment