Page MenuHomec4science

dump_infoscience_auth.py
No OneTemporary

File Metadata

Created
Tue, Apr 23, 14:48

dump_infoscience_auth.py

#!/usr/bin/env python3
import xml.etree.ElementTree as E
import json
import requests
import configparser
def getApiKey():
config = configparser.ConfigParser()
config.read('config.ini')
return config['infoscience_prod']['api']
def get_auth_records(params, f):
search_id = None
header = u"""<?xml version="1.0" encoding="UTF-8" ?>
<collection xmlns="http://www.loc.gov/MARC21/slim">"""
out = ""
while True:
if search_id:
params['search_id'] = search_id
r = requests.get(URL,
params=params,
headers={
"content-type": "application/xml",
"Authorization": "Token {}".format(API_KEY)
})
response = E.fromstring(
r.text.replace('xmlns="http://www.loc.gov/MARC21/slim"', ''))
search_id = response.find('search_id').text
records = list(response.find('collection'))
if not records:
break
for record in records:
record = E.tostring(record).decode("utf-8")
out += "%s%s</collection>" % (header, record)
fh = open("auth_data/"+f+".xml", 'w')
fh.write(out)
print("authority file auth_data/"+f+".xml created")
def getAuthTypes():
collection_api = "https://infoscience.epfl.ch/api/v1/collections?c=Authorities"
response = requests.get(collection_api,
headers={
"content-type": "application/json",
"Authorization": "Token {}".format(API_KEY)
})
json = response.json()
aColl = json['Authorities']['children']
return aColl
URL = "https://infoscience.epfl.ch/api/v1/search"
API_KEY = getApiKey()
authorityTypes = getAuthTypes()
for auttype in authorityTypes:
params = {'format': 'xml', 'c': auttype}
get_auth_records(params, auttype)

Event Timeline