dump_infoscience_auth.py
No OneTemporary
Actions

Subscribers

None

File Metadata

	#!/usr/bin/env python3

	import xml.etree.ElementTree as E
	import json
	import requests
	import configparser


	def getApiKey():
	config = configparser.ConfigParser()
	config.read('config.ini')
	return config['infoscience_prod']['api']


	def get_auth_records(params, f):
	search_id = None
	header = u"""<?xml version="1.0" encoding="UTF-8" ?>
	<collection xmlns="http://www.loc.gov/MARC21/slim">"""
	out = ""
	while True:
	if search_id:
	params['search_id'] = search_id
	r = requests.get(URL,
	params=params,
	headers={
	"content-type": "application/xml",
	"Authorization": "Token {}".format(API_KEY)
	})
	response = E.fromstring(
	r.text.replace('xmlns="http://www.loc.gov/MARC21/slim"', ''))
	search_id = response.find('search_id').text
	records = list(response.find('collection'))
	if not records:
	break
	for record in records:
	record = E.tostring(record).decode("utf-8")
	out += "%s%s</collection>" % (header, record)
	fh = open("auth_data/"+f+".xml", 'w')
	fh.write(out)
	print("authority file auth_data/"+f+".xml created")


	def getAuthTypes():
	collection_api = "https://infoscience.epfl.ch/api/v1/collections?c=Authorities"

	response = requests.get(collection_api,
	headers={
	"content-type": "application/json",
	"Authorization": "Token {}".format(API_KEY)
	})
	json = response.json()
	aColl = json['Authorities']['children']
	return aColl

	URL = "https://infoscience.epfl.ch/api/v1/search"
	API_KEY = getApiKey()

	authorityTypes = getAuthTypes()

	for auttype in authorityTypes:
	params = {'format': 'xml', 'c': auttype}
	get_auth_records(params, auttype)