Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F91206970
dump_infoscience_auth.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, Nov 8, 23:41
Size
1 KB
Mime Type
text/x-python
Expires
Sun, Nov 10, 23:41 (2 d)
Engine
blob
Format
Raw Data
Handle
22220154
Attached To
rINFOSCIENCEAUTH Infoscience_Authorities
dump_infoscience_auth.py
View Options
#!/usr/bin/env python3
import
xml.etree.ElementTree
as
E
import
json
import
requests
import
configparser
def
getApiKey
():
config
=
configparser
.
ConfigParser
()
config
.
read
(
'config.ini'
)
return
config
[
'infoscience_prod'
][
'api'
]
def
get_auth_records
(
params
,
f
):
search_id
=
None
header
=
u"""<?xml version="1.0" encoding="UTF-8" ?>
<collection xmlns="http://www.loc.gov/MARC21/slim">"""
out
=
""
while
True
:
if
search_id
:
params
[
'search_id'
]
=
search_id
r
=
requests
.
get
(
URL
,
params
=
params
,
headers
=
{
"content-type"
:
"application/xml"
,
"Authorization"
:
"Token {}"
.
format
(
API_KEY
)
})
response
=
E
.
fromstring
(
r
.
text
.
replace
(
'xmlns="http://www.loc.gov/MARC21/slim"'
,
''
))
search_id
=
response
.
find
(
'search_id'
)
.
text
records
=
list
(
response
.
find
(
'collection'
))
if
not
records
:
break
for
record
in
records
:
record
=
E
.
tostring
(
record
)
.
decode
(
"utf-8"
)
out
+=
"
%s%s
</collection>"
%
(
header
,
record
)
fh
=
open
(
"auth_data/"
+
f
+
".xml"
,
'w'
)
fh
.
write
(
out
)
print
(
"authority file auth_data/"
+
f
+
".xml created"
)
def
getAuthTypes
():
collection_api
=
"https://infoscience.epfl.ch/api/v1/collections?c=Authorities"
response
=
requests
.
get
(
collection_api
,
headers
=
{
"content-type"
:
"application/json"
,
"Authorization"
:
"Token {}"
.
format
(
API_KEY
)
})
json
=
response
.
json
()
aColl
=
json
[
'Authorities'
][
'children'
]
return
aColl
URL
=
"https://infoscience.epfl.ch/api/v1/search"
API_KEY
=
getApiKey
()
authorityTypes
=
getAuthTypes
()
for
auttype
in
authorityTypes
:
params
=
{
'format'
:
'xml'
,
'c'
:
auttype
}
get_auth_records
(
params
,
auttype
)
Event Timeline
Log In to Comment