Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90674796
bibrank_downloads_similarity.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 3, 19:05
Size
4 KB
Mime Type
text/x-python
Expires
Tue, Nov 5, 19:05 (2 d)
Engine
blob
Format
Raw Data
Handle
22118017
Attached To
R3600 invenio-infoscience
bibrank_downloads_similarity.py
View Options
# -*- coding: utf-8 -*-
##
## This file is part of Invenio.
## Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 CERN.
##
## Invenio is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License as
## published by the Free Software Foundation; either version 2 of the
## License, or (at your option) any later version.
##
## Invenio is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
__revision__
=
\
"$Id$"
from
invenio.config
import
\
CFG_ACCESS_CONTROL_LEVEL_SITE
,
\
CFG_CERN_SITE
from
invenio.dbquery
import
run_sql
from
invenio.bibrank_downloads_indexer
import
database_tuples_to_single_list
def
get_fieldvalues
(
recID
,
tag
):
"""Return list of field values for field TAG inside record RECID.
Copy from search_engine"""
out
=
[]
if
tag
==
"001___"
:
# we have asked for recID that is not stored in bibXXx tables
out
.
append
(
str
(
recID
))
else
:
# we are going to look inside bibXXx tables
digit
=
tag
[
0
:
2
]
bx
=
"bib
%s
x"
%
digit
bibx
=
"bibrec_bib
%s
x"
%
digit
query
=
"SELECT bx.value FROM
%s
AS bx,
%s
AS bibx "
\
" WHERE bibx.id_bibrec='
%s
' AND bx.id=bibx.id_bibxxx AND "
\
" bx.tag LIKE '
%s
' ORDER BY bibx.field_number, bx.tag ASC"
%
\
(
bx
,
bibx
,
recID
,
tag
)
res
=
run_sql
(
query
)
for
row
in
res
:
out
.
append
(
row
[
0
])
return
out
def
record_exists
(
recID
):
"""Return 1 if record RECID exists.
Return 0 if it doesn't exist.
Return -1 if it exists but is marked as deleted.
Copy from search_engine"""
out
=
0
query
=
"SELECT id FROM bibrec WHERE id='
%s
'"
%
recID
res
=
run_sql
(
query
,
None
,
1
)
if
res
:
# record exists; now check whether it isn't marked as deleted:
dbcollids
=
get_fieldvalues
(
recID
,
"980__%"
)
if
(
"DELETED"
in
dbcollids
)
or
(
CFG_CERN_SITE
and
"DUMMY"
in
dbcollids
):
out
=
-
1
# exists, but marked as deleted
else
:
out
=
1
# exists fine
return
out
### INTERFACE
def
register_page_view_event
(
recid
,
uid
,
client_ip_address
):
"""Register Detailed record page view event for record RECID
consulted by user UID from machine CLIENT_HOST_IP.
To be called by the search engine.
"""
if
CFG_ACCESS_CONTROL_LEVEL_SITE
>=
1
:
# do not register access if we are in read-only access control
# site mode:
return
[]
return
run_sql
(
"INSERT INTO rnkPAGEVIEWS "
\
" (id_bibrec,id_user,client_host,view_time) "
\
" VALUES (
%s
,
%s
,INET_ATON(
%s
),NOW())"
,
\
(
recid
,
uid
,
client_ip_address
))
def
calculate_reading_similarity_list
(
recid
,
type
=
"pageviews"
):
"""Calculate reading similarity data to use in reading similarity
boxes (``people who downloaded/viewed this file/page have also
downloaded/viewed''). Return list of (recid1, score1),
(recid2,score2), ... for all recidN that were consulted by the
same people who have also consulted RECID. The reading
similarity TYPE can be either `pageviews' or `downloads',
depending whether we want to obtain page view similarity or
download similarity.
"""
if
type
==
"downloads"
:
tablename
=
"rnkDOWNLOADS"
else
:
# default
tablename
=
"rnkPAGEVIEWS"
# firstly compute the set of client hosts who consulted recid:
client_host_list
=
run_sql
(
"SELECT DISTINCT(client_host)"
+
\
" FROM "
+
tablename
+
\
" WHERE id_bibrec=
%s
"
+
\
" AND client_host IS NOT NULL"
,
(
recid
,))
# secondly look up all recids that were consulted by these client hosts,
# and order them by the number of different client hosts reading them:
res
=
[]
if
client_host_list
!=
():
client_host_list
=
str
(
database_tuples_to_single_list
(
client_host_list
))
client_host_list
=
client_host_list
.
replace
(
"L"
,
""
)
client_host_list
=
client_host_list
.
replace
(
"["
,
""
)
client_host_list
=
client_host_list
.
replace
(
"]"
,
""
)
res
=
run_sql
(
"SELECT id_bibrec,COUNT(DISTINCT(client_host)) AS c"
\
" FROM "
+
tablename
+
\
" WHERE client_host IN ("
+
client_host_list
+
")"
+
\
" AND id_bibrec !=
%s
"
\
" GROUP BY id_bibrec ORDER BY c DESC LIMIT 10"
,
(
recid
,))
return
res
Event Timeline
Log In to Comment