diff --git a/import_scripts/01_oacct_countries.md b/import_scripts/01_oacct_countries.md deleted file mode 100644 index 1aa14a4e..00000000 --- a/import_scripts/01_oacct_countries.md +++ /dev/null @@ -1,587 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - - -```python -import pandas as pd -import csv -import json -import numpy as np -``` - -## Table Countries - - -```python -# La table a été corrigée pour ajouter la valeur manquante à la fin : -# International Agency International Agency OI INT 999 -country = pd.read_csv('iso_3166.txt', encoding='utf-8', header=0, sep='\t', na_filter=False) -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
English short nameFrench short nameAlpha-2 codeAlpha-3 codeNumeric
0AfghanistanAfghanistan (l')AFAFG4
1AlbaniaAlbanie (l')ALALB8
2AlgeriaAlgérie (l')DZDZA12
3American SamoaSamoa américaines (les)ASASM16
4AndorraAndorre (l')ADAND20
..................
245YemenYémen (le)YEYEM887
246ZambiaZambie (la)ZMZMB894
247ZimbabweZimbabwe (le)ZWZWE716
248Åland IslandsÅland(les Îles)AXALA248
249International AgencyInternational AgencyOIINT999
-

250 rows × 5 columns

-
- - - - -```python -country.loc[country['Alpha-2 code'].isnull()] -``` - - - - -
- - - - - - - - - - - - - - -
English short nameFrench short nameAlpha-2 codeAlpha-3 codeNumeric
-
- - - - -```python -# convertir l'index en id -country = country.reset_index() -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexEnglish short nameFrench short nameAlpha-2 codeAlpha-3 codeNumeric
00AfghanistanAfghanistan (l')AFAFG4
11AlbaniaAlbanie (l')ALALB8
22AlgeriaAlgérie (l')DZDZA12
33American SamoaSamoa américaines (les)ASASM16
44AndorraAndorre (l')ADAND20
.....................
245245YemenYémen (le)YEYEM887
246246ZambiaZambie (la)ZMZMB894
247247ZimbabweZimbabwe (le)ZWZWE716
248248Åland IslandsÅland(les Îles)AXALA248
249249International AgencyInternational AgencyOIINT999
-

250 rows × 6 columns

-
- - - - -```python -country['id'] = country['index'] + 1 -del country['index'] -del country['French short name'] -del country['Alpha-3 code'] -del country['Numeric'] -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
English short nameAlpha-2 codeid
0AfghanistanAF1
1AlbaniaAL2
2AlgeriaDZ3
3American SamoaAS4
4AndorraAD5
............
245YemenYE246
246ZambiaZM247
247ZimbabweZW248
248Åland IslandsAX249
249International AgencyOI250
-

250 rows × 3 columns

-
- - - - -```python -# renommer les colonnes -country = country.rename(columns={'Alpha-2 code' : 'iso_code', 'English short name' : 'name'}) -``` - - -```python -# ajout de la valeur UNKNOWN -country = country.append({'id' : 999999, 'iso_code' : '__', 'name' : 'UNKNOWN'}, ignore_index=True) -``` - - -```python -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameiso_codeid
0AfghanistanAF1
1AlbaniaAL2
2AlgeriaDZ3
3American SamoaAS4
4AndorraAD5
............
246ZambiaZM247
247ZimbabweZW248
248Åland IslandsAX249
249International AgencyOI250
250UNKNOWN__999999
-

251 rows × 3 columns

-
- - - - -```python -# esport JSON -result = country.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/country.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -country.to_csv('sample/country.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export csv -country.to_csv('country.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -country.to_excel('sample/country.xlsx', index=False) -``` diff --git a/import_scripts/01_oacct_countries.py b/import_scripts/01_oacct_countries.py deleted file mode 100644 index 5f4ff631..00000000 --- a/import_scripts/01_oacct_countries.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np - - -# ## Table Countries - -# In[2]: - - -# La table a été corrigée pour ajouter la valeur manquante à la fin : -# International Agency International Agency OI INT 999 -country = pd.read_csv('iso_3166.txt', encoding='utf-8', header=0, sep='\t', na_filter=False) -country - - -# In[3]: - - -country.loc[country['Alpha-2 code'].isnull()] - - -# In[4]: - - -# convertir l'index en id -country = country.reset_index() -country - - -# In[5]: - - -country['id'] = country['index'] + 1 -del country['index'] -del country['French short name'] -del country['Alpha-3 code'] -del country['Numeric'] -country - - -# In[6]: - - -# renommer les colonnes -country = country.rename(columns={'Alpha-2 code' : 'iso_code', 'English short name' : 'name'}) - - -# In[7]: - - -# ajout de la valeur UNKNOWN -country = country.append({'id' : 999999, 'iso_code' : '__', 'name' : 'UNKNOWN'}, ignore_index=True) - - -# In[8]: - - -country - - -# In[9]: - - -# esport JSON -result = country.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/country.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[10]: - - -# export csv -country.to_csv('sample/country.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[11]: - - -# export csv -country.to_csv('country.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[12]: - - -# export excel -country.to_excel('sample/country.xlsx', index=False) - diff --git a/import_scripts/02_oacct_languages.md b/import_scripts/02_oacct_languages.md deleted file mode 100644 index efcffbdd..00000000 --- a/import_scripts/02_oacct_languages.md +++ /dev/null @@ -1,694 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - - -```python -import pandas as pd -import csv -import json -import numpy as np -``` - -## Table Language - - -```python -# https://www.loc.gov/standards/iso639-2/php/code_list.php -# ISO 639-2 Code ISO 639-1 Code English name of Language French name of Language German name of Language -language = pd.read_csv('ISO-639-2_utf-8.txt', encoding='utf-8', header=None, sep='|', na_filter=False, names=['ISO 639-2 Code', 'ISO 639-1 Code', 'ignore', 'English name of Language', 'French name of Language'], index_col=False) -language -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISO 639-2 CodeISO 639-1 CodeignoreEnglish name of LanguageFrench name of Language
0aaraaAfarafar
1abkabAbkhazianabkhaze
2aceAchineseaceh
3achAcoliacoli
4adaAdangmeadangme
..................
482zndZande languageszandé, langues
483zulzuZuluzoulou
484zunZunizuni
485zxxNo linguistic content; Not applicablepas de contenu linguistique; non applicable
486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazakizaza; dimili; dimli; kirdki; kirmanjki; zazaki
-

487 rows × 5 columns

-
- - - - -```python -language.loc[language['ISO 639-2 Code'].isnull()] -``` - - - - -
- - - - - - - - - - - - - - -
ISO 639-2 CodeISO 639-1 CodeignoreEnglish name of LanguageFrench name of Language
-
- - - - -```python -# convertir l'index en id -language = language.reset_index() -language -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexISO 639-2 CodeISO 639-1 CodeignoreEnglish name of LanguageFrench name of Language
00aaraaAfarafar
11abkabAbkhazianabkhaze
22aceAchineseaceh
33achAcoliacoli
44adaAdangmeadangme
.....................
482482zndZande languageszandé, langues
483483zulzuZuluzoulou
484484zunZunizuni
485485zxxNo linguistic content; Not applicablepas de contenu linguistique; non applicable
486486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazakizaza; dimili; dimli; kirdki; kirmanjki; zazaki
-

487 rows × 6 columns

-
- - - - -```python -language['id'] = language['index'] + 1 -del language['index'] -del language['ignore'] -del language['French name of Language'] -del language['ISO 639-1 Code'] -language -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISO 639-2 CodeEnglish name of Languageid
0aarAfar1
1abkAbkhazian2
2aceAchinese3
3achAcoli4
4adaAdangme5
............
482zndZande languages483
483zulZulu484
484zunZuni485
485zxxNo linguistic content; Not applicable486
486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki487
-

487 rows × 3 columns

-
- - - - -```python -# renommer les colonnes -language = language.rename(columns={'ISO 639-2 Code' : 'iso_code', 'English name of Language' : 'name'}) -``` - - -```python -language -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iso_codenameid
0aarAfar1
1abkAbkhazian2
2aceAchinese3
3achAcoli4
4adaAdangme5
............
482zndZande languages483
483zulZulu484
484zunZuni485
485zxxNo linguistic content; Not applicable486
486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki487
-

487 rows × 3 columns

-
- - - - -```python -# corriger la valeur trop longue qaa-qtz -language.loc[language['iso_code'] == 'qaa-qtz', 'iso_code'] = 'qaa' -``` - - -```python -# ajout de la valeur UNKNOWN -language = language.append({'id' : 999999, 'iso_code' : '___', 'name' : 'UNKNOWN'}, ignore_index=True) -language -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iso_codenameid
0aarAfar1
1abkAbkhazian2
2aceAchinese3
3achAcoli4
4adaAdangme5
............
483zulZulu484
484zunZuni485
485zxxNo linguistic content; Not applicable486
486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki487
487___UNKNOWN999999
-

488 rows × 3 columns

-
- - - - -```python -# esport JSON -result = language.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/language.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -language.to_csv('language.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export csv -language.to_csv('sample/language.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -language.to_excel('sample/language.xlsx', index=False) -``` diff --git a/import_scripts/02_oacct_languages.py b/import_scripts/02_oacct_languages.py deleted file mode 100644 index 7f859fdc..00000000 --- a/import_scripts/02_oacct_languages.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np - - -# ## Table Language - -# In[2]: - - -# https://www.loc.gov/standards/iso639-2/php/code_list.php -# ISO 639-2 Code ISO 639-1 Code English name of Language French name of Language German name of Language -language = pd.read_csv('ISO-639-2_utf-8.txt', encoding='utf-8', header=None, sep='|', na_filter=False, names=['ISO 639-2 Code', 'ISO 639-1 Code', 'ignore', 'English name of Language', 'French name of Language'], index_col=False) -language - - -# In[3]: - - -language.loc[language['ISO 639-2 Code'].isnull()] - - -# In[4]: - - -# convertir l'index en id -language = language.reset_index() -language - - -# In[5]: - - -language['id'] = language['index'] + 1 -del language['index'] -del language['ignore'] -del language['French name of Language'] -del language['ISO 639-1 Code'] -language - - -# In[6]: - - -# renommer les colonnes -language = language.rename(columns={'ISO 639-2 Code' : 'iso_code', 'English name of Language' : 'name'}) - - -# In[7]: - - -language - - -# In[8]: - - -# corriger la valeur trop longue qaa-qtz -language.loc[language['iso_code'] == 'qaa-qtz', 'iso_code'] = 'qaa' - - -# In[9]: - - -# ajout de la valeur UNKNOWN -language = language.append({'id' : 999999, 'iso_code' : '___', 'name' : 'UNKNOWN'}, ignore_index=True) -language - - -# In[10]: - - -# esport JSON -result = language.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/language.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[11]: - - -# export csv -language.to_csv('language.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[12]: - - -# export csv -language.to_csv('sample/language.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[13]: - - -# export excel -language.to_excel('sample/language.xlsx', index=False) - diff --git a/import_scripts/03_oacct_journals.md b/import_scripts/03_oacct_journals.md deleted file mode 100644 index b47e42df..00000000 --- a/import_scripts/03_oacct_journals.md +++ /dev/null @@ -1,17070 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Extraction des données des revues - - -## Corpus initial - -ISSNs des revues des publication archivées sur l'AoU UNIGE et sur Infoscience EPFL - -* Fichier des ISSNs de l'AoU exporté le 16.10.2020 -* Fichier des ISSNs de Infoscience exporté le 28.01.2021 -* Données extraits à partir du JSON de ISSN.org - - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -# paramètre pour le nombre de journaux dans le sample (0 pour prendre tout) -journals_sample_n = 1000 -``` - -## Table OA categories - -* 1 : UNKNOWN -* 2 : Green -* 3 : Hybrid -* 4 : Full -* 5 : Gold -* 6 : Diamond - - -```python -# creation du DF -col_names = ['id', - 'status', - 'description', - 'subscription', - 'accepted_manuscript', - 'apc', - 'final_version' - ] -oas = pd.DataFrame(columns = col_names) -oas -``` - - - - -
- - - - - - - - - - - - - - - - -
idstatusdescriptionsubscriptionaccepted_manuscriptapcfinal_version
-
- - - - -```python -# ajout des valeurs -oas = oas.append({'id' : 1, 'status' : 'UNKNOWN', 'description' : '', 'subscription' : 0, 'accepted_manuscript' : 0, 'apc' : 0, 'final_version' : 0}, ignore_index=True) -oas = oas.append({'id' : 2, 'status' : 'Green', 'description' : 'Paywalled access journal, usually allows the archive of submitted or accepted version on institutional repositories (embargo periods may apply)', 'subscription' : 1, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 0}, ignore_index=True) -oas = oas.append({'id' : 3, 'status' : 'hybrid', 'description' : 'Paywalled access journal, offers several Open Access upon payment of APCs. It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 1, 'accepted_manuscript' : 1, 'apc' : 1, 'final_version' : 1}, ignore_index=True) -# oas = oas.append({'id' : 4, 'status' : 'Full', 'description' : 'No subscription, Green or Gold', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 1}, ignore_index=True) -oas = oas.append({'id' : 5, 'status' : 'Gold', 'description' : 'Open Access journal (payment of APCs may apply). It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 1, 'final_version' : 1}, ignore_index=True) -oas = oas.append({'id' : 6, 'status' : 'Diamond', 'description' : 'Open Access journal (without payment of APCs). It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 1}, ignore_index=True) -``` - - -```python -oas -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idstatusdescriptionsubscriptionaccepted_manuscriptapcfinal_version
01UNKNOWN0000
12GreenPaywalled access journal, usually allows the a...1100
23hybridPaywalled access journal, offers several Open ...1111
35GoldOpen Access journal (payment of APCs may apply...0111
46DiamondOpen Access journal (without payment of APCs)....0101
-
- - - - -```python -# esport JSON -result = oas.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/oa.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -oas.to_csv('sample/oa.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -oas.to_excel('sample/oa.xlsx', index=False) -``` - -## Table Journals - - -```python -issns = pd.read_csv('issn/issns_count.tsv', encoding='utf-8', header=0, sep='\t') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issncount_unigecount_epflcount
01660-93791654.02.01656.0
10031-9007602.0678.01280.0
21932-6203608.0340.0948.0
32174-8454732.00.0732.0
41098-0121334.0393.0727.0
...............
135931471-01531.00.01.0
135942257-52941.00.01.0
135950950-92401.00.01.0
135961868-18831.00.01.0
135971063-68890.01.01.0
-

13598 rows × 4 columns

-
- - - - -```python -# ajout des colonnes -issns.insert(0, 'id', '', False) -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissncount_unigecount_epflcount
01660-93791654.02.01656.0
10031-9007602.0678.01280.0
21932-6203608.0340.0948.0
32174-8454732.00.0732.0
41098-0121334.0393.0727.0
..................
135931471-01531.00.01.0
135942257-52941.00.01.0
135950950-92401.00.01.0
135961868-18831.00.01.0
135971063-68890.01.01.0
-

13598 rows × 5 columns

-
- - - - -```python -# convertir l'index en id -issns = issns.reset_index() -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexidissncount_unigecount_epflcount
001660-93791654.02.01656.0
110031-9007602.0678.01280.0
221932-6203608.0340.0948.0
332174-8454732.00.0732.0
441098-0121334.0393.0727.0
.....................
13593135931471-01531.00.01.0
13594135942257-52941.00.01.0
13595135950950-92401.00.01.0
13596135961868-18831.00.01.0
13597135971063-68890.01.01.0
-

13598 rows × 6 columns

-
- - - - -```python -# ajout de l'id avec l'index + 1 -issns['id'] = issns['index'] + 1 -del issns['index'] -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissncount_unigecount_epflcount
011660-93791654.02.01656.0
120031-9007602.0678.01280.0
231932-6203608.0340.0948.0
342174-8454732.00.0732.0
451098-0121334.0393.0727.0
..................
13593135941471-01531.00.01.0
13594135952257-52941.00.01.0
13595135960950-92401.00.01.0
13596135971868-18831.00.01.0
13597135981063-68890.01.01.0
-

13598 rows × 5 columns

-
- - - - -```python -# reduction à X journaux pour l'échantillon de test -if journals_sample_n > 0 : - issns = issns.loc[:journals_sample_n] -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissncount_unigecount_epflcount
011660-93791654.02.01656.0
120031-9007602.0678.01280.0
231932-6203608.0340.0948.0
342174-8454732.00.0732.0
451098-0121334.0393.0727.0
..................
9969970964-17261.020.021.0
9979980022-346821.00.021.0
9989991432-206417.04.021.0
99910000960-14815.016.021.0
100010010161-756721.00.021.0
-

1001 rows × 5 columns

-
- - - - -```python -# ajout des ISSN-L -df_issnl = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -df_issnl -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISSNISSN-L
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -df_issnl = df_issnl.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -``` - - -```python -issns = pd.merge(issns, df_issnl, on='issn', how='left') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissncount_unigecount_epflcountissnl
011660-93791654.02.01656.01660-9379
120031-9007602.0678.01280.00031-9007
231932-6203608.0340.0948.01932-6203
342174-8454732.00.0732.02174-8454
451098-0121334.0393.0727.01098-0121
.....................
9969970964-17261.020.021.00964-1726
9979980022-346821.00.021.00022-3468
9989991432-206417.04.021.00178-8051
99910000960-14815.016.021.00960-1481
100010010161-756721.00.021.00161-7567
-

1001 rows × 6 columns

-
- - - - -```python -# creation du DF -# 'oa_status' supprimé pour le moment -col_names = ['id', - 'issn', - 'issnl', - 'title', - 'starting_year', - 'end_year', - 'url', - 'name_short_iso_4' - ] -journals = pd.DataFrame(columns = col_names) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4
-
- - - - -```python -# creation du DF -col_names = ['id', 'iso_code'] -journals_languages = pd.DataFrame(columns = col_names) -journals_languages -``` - - - - -
- - - - - - - - - - - -
idiso_code
-
- - - - -```python -# creation du DF -# 'oa_status' supprimé -col_names = ['id', 'iso_code'] -journals_countries = pd.DataFrame(columns = col_names) -journals_countries -``` - - - - -
- - - - - - - - - - - -
idiso_code
-
- - - - -```python -# extraction des informations à partir des données ISSN.org -for index, row in issns.iterrows(): - myid = row['id'] - myissn = row['issn'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - issnl = np.nan - title = '' - keytitle = '' - starting_year = np.nan - end_year = np.nan - myurl = np.nan - journal_country = np.nan - journal_language = np.nan - keytitle_abbr = np.nan - # export en json - if os.path.exists('issn/data/' + myissn + '.json'): - with open('issn/data/' + myissn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + myissn): - if ('mainTitle' in x): - title = x['mainTitle'] - else : - if ('name' in x): - title = x['name'] - # print(myissn) - if ('startDate' in x): - starting_year = x['startDate'] - if ('endDate' in x): - end_year = x['endDate'] - if ('url' in x): - urls = x['url'] - if type(urls) is list: - for url in urls: - # Filtrer les URLs des archives : - # www.ncbi.nlm.nih.gov/pmc/* - # www.pubmedcentral.gov/* - # pubmedcentral.nih.gov/* - # bibpurl.oclc.org/* - # www.jstor.org/* - # ieeexplore.ieee.org - # ovidsp.ovid.com - # et garder le premier des restants - myurl = url - if ('ncbi.nlm.nih.gov' not in url - and 'pubmedcentral' not in url - and 'bibpurl.oclc.org' not in url - and 'jstor.org' not in url - and 'ieeexplore.ieee.org' not in url - and 'ovidsp.ovid.com' not in url): - break - else : - myurl = x['url'] - if ('spatial' in x): - countries = x['spatial'] - if type(countries) is list: - for country in countries: - if ('https://www.iso.org/obp/ui/#iso:code:3166:' in country): - journal_country = country[-2:] - journals_countries = journals_countries.append({'id' : myid, 'iso_code' : journal_country}, ignore_index=True) - else : - if ('https://www.iso.org/obp/ui/#iso:code:3166:' in countries): - journal_country = countries[-2:] - journals_countries = journals_countries.append({'id' : myid, 'iso_code' : journal_country}, ignore_index=True) - # langue "inLanguage": "http://id.loc.gov/vocabulary/iso639-2/eng", - if ('inLanguage' in x): - languages = x['inLanguage'] - if type(languages) is list: - for language in languages: - journal_language = language[-3:] - journals_languages = journals_languages.append({'id' : myid, 'iso_code' : journal_language}, ignore_index=True) - else : - journal_language = languages[-3:] - journals_languages = journals_languages.append({'id' : myid, 'iso_code' : journal_language}, ignore_index=True) - if (x['@id'] == 'resource/ISSN/' + myissn + '#KeyTitle'): - if ('value' in x): - keytitle = x['value'] - if (x['@id'] == 'resource/ISSN/' + myissn + '#ISSN-L'): - if ('value' in x): - issnl = x['value'] - # "@id": "resource/ISSN/1098-0121#AbbreviatedKeyTitle", - if (x['@id'] == 'resource/ISSN/' + myissn + '#AbbreviatedKeyTitle'): - if ('value' in x): - mykeytitle_abbrs = x['value'] - if type(mykeytitle_abbrs) is list: - for mykeytitle_abbr in mykeytitle_abbrs: - print(myissn + ' - AbbreviatedKeyTitle is a list ' + mykeytitle_abbr) - keytitle_abbr = mykeytitle_abbr - with open('sample/03_journals_issn_multiple_titles.txt', 'a', encoding='utf-8') as g: - g.write(myissn + ' AbbreviatedKeyTitle is a list ' + mykeytitle_abbr + '\n') - break - else : - keytitle_abbr = mykeytitle_abbrs - if keytitle != '' : - title = keytitle - if title != '' : - # supprimer le point à la fin - if (title[-1] == '.'): - title = title[0:-1] - # remplacer les caractères spéciaux ˜The œ - if type(title) is list: - for mytitlei in title: - print(myissn + ' - title is a list ' + mytitlei) - title = str.replace(mytitlei, '˜The œ', 'The ') - with open('sample/03_journals_issn_multiple_titles.txt', 'a', encoding='utf-8') as g: - g.write(myissn + ' title is a list ' + mytitlei + '\n') - break - else : - title = str.replace(title, '˜The œ', 'The ') - else : - print(row['issn'] + ' - not found') - with open('sample/03_journals_issn_errors.txt', 'a', encoding='utf-8') as g: - g.write(row['issn'] + ' not found \n') - journals.at[index,'id'] = myid - journals.at[index,'title'] = title - journals.at[index,'issn'] = myissn - journals.at[index,'issnl'] = issnl - journals.at[index,'starting_year'] = starting_year - journals.at[index,'end_year'] = end_year - journals.at[index,'url'] = myurl - journals.at[index,'name_short_iso_4'] = keytitle_abbr -``` - - 0 - 10 - 1094-4087 - AbbreviatedKeyTitle is a list Opt Express - 20 - 30 - 40 - 50 - 60 - 70 - 80 - 90 - 100 - 110 - 120 - 130 - 140 - 150 - 160 - 170 - 0899-823X - AbbreviatedKeyTitle is a list Infect. control hosp. epidemiol. - 180 - 190 - 200 - 210 - 220 - 230 - 240 - 250 - 260 - 270 - 280 - 290 - 300 - 0370-693 - not found - 310 - 320 - 330 - 340 - 350 - 360 - 370 - 380 - 390 - 400 - 410 - 420 - 430 - 440 - 450 - 460 - 470 - 480 - 490 - 500 - 510 - 520 - 530 - 540 - 1544-9173 - AbbreviatedKeyTitle is a list PLoS Biol - 550 - 560 - 570 - 580 - 590 - 600 - 610 - 620 - 0277-86X - not found - 630 - 640 - 650 - 0003-951 - not found - 660 - 670 - 680 - 690 - 700 - 710 - 720 - 730 - 740 - 750 - 760 - 770 - 780 - 790 - 1931-3128 - AbbreviatedKeyTitle is a list Cell Host Microbe - 800 - 810 - 820 - 830 - 840 - 850 - 860 - 870 - 880 - 890 - 900 - 910 - 920 - 930 - 940 - 950 - 960 - 970 - 980 - 990 - 1000 - - - -```python -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN
342174-84542174-8454EU-topías20119999NaNEU-topías
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.
...........................
9969970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)
9979980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)
9989991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)
99910000960-14810960-1481Renewable energy19919999NaNRenew. energy
100010010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...
-

1001 rows × 8 columns

-
- - - - -```python -# titres vides -journals.loc[journals['title'] == ''] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4
3093100370-693NaNNaNNaNNaNNaN
3613620777-5466NaN||||||||NaNNaN
6296300277-86XNaNNaNNaNNaNNaN
6566570003-951NaNNaNNaNNaNNaN
8408411089-5647NaNNaNNaNNaNNaN
-
- - - - -```python -# export csv des titres vides -journals.loc[journals['title'] == ''].to_csv('sample/journals_sans_titre.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel des ids -journals.loc[journals['title'] == ''].to_excel('sample/journals_sans_titre.xlsx', index=False) -``` - - -```python -# garder les lignes avec titre -journals = journals.loc[journals['title'] != ''] -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN
342174-84542174-8454EU-topías20119999NaNEU-topías
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.
...........................
9969970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)
9979980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)
9989991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)
99910000960-14810960-1481Renewable energy19919999NaNRenew. energy
100010010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...
-

996 rows × 8 columns

-
- - - - -```python -journals.shape[0] -``` - - - - - 996 - - - -## Languages - - -```python -journals_languages -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idiso_code
01fre
12eng
23eng
34eng
44fre
.........
1117997eng
1118998eng
1119999eng
11201000eng
11211001eng
-

1122 rows × 2 columns

-
- - - - -```python -# ouvrir la table des langues -languages = pd.read_csv('sample/language.tsv', encoding='utf-8', header=0, sep='\t') -languages -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iso_codenameid
0aarAfar1
1abkAbkhazian2
2aceAchinese3
3achAcoli4
4adaAdangme5
............
483zulZulu484
484zunZuni485
485zxxNo linguistic content; Not applicable486
486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki487
487___UNKNOWN999999
-

488 rows × 3 columns

-
- - - - -```python -# renommer les colonnes -del languages['name'] -languages = languages.rename(columns={'id' : 'language'}) -``` - - -```python -# merge avec languages -journals_languages = pd.merge(journals_languages, languages, on='iso_code', how='left') -journals_languages -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idiso_codelanguage
01fre138
12eng124
23eng124
34eng124
44fre138
............
1117997eng124
1118998eng124
1119999eng124
11201000eng124
11211001eng124
-

1122 rows × 3 columns

-
- - - - -```python -# concat valeurs avec même id -journals_languages['language'] = journals_languages['language'].astype(str) -journals_languages = journals_languages.groupby('id').agg({'language': lambda x: ', '.join(x)}) -journals_languages -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
language
id
1138
2124
3124
4124, 138, 402, 292
5124
......
997124
998124
999124
1000124
1001124
-

996 rows × 1 columns

-
- - - - -```python -# recuperation de l'id des langues -journals = pd.merge(journals, journals_languages, on='id', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4language
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124
..............................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)124
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124
-

996 rows × 9 columns

-
- - - -## Countries - - -```python -journals_countries -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idiso_code
01CH
12US
23US
34ES
45US
.........
992997GB
993998US
994999DE
9951000GB
9961001US
-

997 rows × 2 columns

-
- - - - -```python -# ouvrir la table des pays -country = pd.read_csv('sample/country.tsv', encoding='utf-8', header=0, sep='\t') -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameiso_codeid
0AfghanistanAF1
1AlbaniaAL2
2AlgeriaDZ3
3American SamoaAS4
4AndorraAD5
............
246ZambiaZM247
247ZimbabweZW248
248Åland IslandsAX249
249International AgencyOI250
250UNKNOWN__999999
-

251 rows × 3 columns

-
- - - - -```python -# renommer les colonnes -del country['name'] -country = country.rename(columns={'id' : 'country'}) -``` - - -```python -# merge avec countries -journals_countries = pd.merge(journals_countries, country, on='iso_code', how='left') -journals_countries -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idiso_codecountry
01CH215
12US236
23US236
34ES209
45US236
............
992997GB234
993998US236
994999DE83
9951000GB234
9961001US236
-

997 rows × 3 columns

-
- - - - -```python -# concat valeurs avec même id -journals_countries['country'] = journals_countries['country'].astype(str) -journals_countries = journals_countries.groupby('id').agg({'country': lambda x: ', '.join(x)}) -journals_countries -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
country
id
1215
2236
3236
4209
5236
......
997234
998236
99983
1000234
1001236
-

997 rows × 1 columns

-
- - - - -```python -# recuperation de l'id des langues -journals = pd.merge(journals, journals_countries, on='id', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236
.................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236
-

996 rows × 10 columns

-
- - - -### DOAJ - - -```python -# ajout de DOAJ info -doaj = pd.read_csv('doaj/journalcsv__doaj_20210312_0636_utf8.csv', encoding='utf-8', header=0) -doaj -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal titleJournal URLURL in DOAJAlternative titleJournal ISSN (print version)Journal EISSN (online version)KeywordsLanguages in which the journal accepts manuscriptsPublisherCountry of publisher...URL for journal's Open Access statementContinuesContinued ByLCC CodesSubjectsDOAJ SealAdded on DateLast updated DateNumber of Article RecordsMost Recent Article Added
0Anais da Academia Brasileira de Ciênciashttp://www.scielo.br/scielo.php?script=sci_ser...https://doaj.org/toc/ed09859a464f4461b1af34279...Annals of the Brazilian Academy of Sciences0001-37651678-2690biological sciences, exact and earth sciences,...EnglishAcademia Brasileira de CiênciasBrazil...http://www.scielo.br/revistas/aabc/isubscrp.htmNaNNaNQScienceNo2004-04-23T21:31:00Z2017-01-04T14:19:54Z26492020-06-10T21:49:11Z
1ACMEhttp://riviste.unimi.it/index.php/ACMEhttps://doaj.org/toc/b1ca04ba56194f29a362b3eef...NaN0001-494X2282-0035italian literature, classic literature, lingui...ItalianUniversità degli Studi di MilanoItaly...http://riviste.unimi.it/index.php/ACME/about/e...NaNNaNAGeneral WorksNo2014-12-22T19:55:58Z2020-02-24T09:07:42Z1662020-06-19T09:42:34Z
2Acta Dermato-Venereologicahttp://www.medicaljournals.se/actahttps://doaj.org/toc/ffde9666ab1d46f1a8c688ce6...NaN0001-55551651-2057sexually transmitted infections, psoriasis, ps...EnglishSociety for Publication of Acta Dermato-Venere...Sweden...https://www.medicaljournals.se/acta/open-acces...NaNNaNRL1-803Medicine: DermatologyNo2011-11-10T12:31:05Z2017-02-22T11:14:48Z10962021-03-11T13:41:33Z
3Acta Médica Costarricensehttp://actamedica.medicos.cr/index.php/Acta_Me...https://doaj.org/toc/a5919aee5ad2413a89cf32df0...NaN0001-60122215-5856medicine, public health, medical sciences, healthEnglish, SpanishColegio de Médicos y Cirujanos de Costa RicaCosta Rica...http://actamedica.medicos.cr/index.php/Acta_Me...NaNNaNRMedicineNo2020-12-22T11:08:24Z2020-12-22T11:08:24Z12072015-12-08T15:06:43Z
4Acta Mycologicahttps://pbsociety.org.pl/journals/index.php/am...https://doaj.org/toc/0e8e2531ae3f455ebb49acb08...NaN0001-625X2353-074Xmycology, micromycetes, marcomycetes, slime mo...EnglishPolish Botanical SocietyPoland...https://pbsociety.org.pl/journals/index.php/am...NaNNaNQH301-705.5Science: Biology (General)No2014-05-29T20:02:32Z2021-01-16T17:41:32Z11542021-03-05T18:55:46Z
..................................................................
16024BME Frontiershttps://spj.sciencemag.org/bmefhttps://doaj.org/toc/f9fa881c1be5443a86ed71c2e...Biomedical Engineering FrontiersNaN2765-8031biomedical imaging, biomedical devices, biomat...EnglishAmerican Association for the Advancement of Sc...United States...https://spj.sciencemag.org/bmef/about/NaNNaNR855-855.5|TP248.13-248.65Medicine: Medicine (General): Medical technolo...No2021-01-22T11:54:20Z2021-01-22T11:54:20Z112021-03-08T09:06:36Z
16025Harvard Kennedy School Misinformation Reviewhttps://misinforeview.hks.harvard.eduhttps://doaj.org/toc/d71096ec7090499681cc0ccf8...HKS Misinformation ReviewNaN2766-1652misinformation, disinformation, fake newsEnglishHarvard Kennedy SchoolUnited States...https://misinforeview.hks.harvard.edu/editoria...NaNNaNT58.5-58.64|P87-96Technology: Technology (General): Industrial e...No2021-02-12T10:29:21Z2021-02-12T10:29:21Z0NaN
16026One Health & Risk Managementhttps://journal.ohrm.bba.md/index.php/journal-...https://doaj.org/toc/68671b966cd24a0ebaa44d78f...OH&RM2887-34582587-3466one health, risc management, public health, hu...English, Romanian, French, RussianAsociatia de Biosiguranta si BiosecuritateMoldova, Republic of...https://journal.ohrm.bba.md/index.php/journal-...NaNNaNR|QMedicine | ScienceNo2021-03-04T16:06:58Z2021-03-04T16:06:58Z42021-03-04T20:46:57Z
16027فصلنامه پژوهش‌های مدیریت منابع انسانیhttps://hrmj.ihu.ac.ir/?lang=enhttps://doaj.org/toc/87d44ffb6ff849b18d5ddce9c...Journal of Research in Human Resources Management8254-80022645-5072human resources managementPersianImam Hussein UniversityIran, Islamic Republic of...https://hrmj.ihu.ac.ir/?lang=enNaNNaNHF5549-5549.5Social Sciences: Commerce: Business: Personnel...No2021-01-20T11:27:05Z2021-01-20T11:27:05Z0NaN
16028Science of Tsunami Hazardshttp://tsunamisociety.org/https://doaj.org/toc/a4f06be11f4f4db489dc034c7...NaN8755-6839NaNtsunamis, tsunami warning systems, earthquakes...EnglishTsunami Society InternationalUnited States...http://tsunamisociety.org/AboutUs.htmlNaNNaNGC1-1581Geography. Anthropology. Recreation: OceanographyNo2009-04-16T17:40:30Z2016-07-21T16:09:38Z2392021-02-27T01:00:51Z
-

16029 rows × 53 columns

-
- - - - -```python -# ajout ISSNL -doaj['issn'] = doaj['Journal ISSN (print version)'] -doaj.loc[doaj['issn'].isna(), 'issn'] = doaj['Journal EISSN (online version)'] -doaj -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal titleJournal URLURL in DOAJAlternative titleJournal ISSN (print version)Journal EISSN (online version)KeywordsLanguages in which the journal accepts manuscriptsPublisherCountry of publisher...ContinuesContinued ByLCC CodesSubjectsDOAJ SealAdded on DateLast updated DateNumber of Article RecordsMost Recent Article Addedissn
0Anais da Academia Brasileira de Ciênciashttp://www.scielo.br/scielo.php?script=sci_ser...https://doaj.org/toc/ed09859a464f4461b1af34279...Annals of the Brazilian Academy of Sciences0001-37651678-2690biological sciences, exact and earth sciences,...EnglishAcademia Brasileira de CiênciasBrazil...NaNNaNQScienceNo2004-04-23T21:31:00Z2017-01-04T14:19:54Z26492020-06-10T21:49:11Z0001-3765
1ACMEhttp://riviste.unimi.it/index.php/ACMEhttps://doaj.org/toc/b1ca04ba56194f29a362b3eef...NaN0001-494X2282-0035italian literature, classic literature, lingui...ItalianUniversità degli Studi di MilanoItaly...NaNNaNAGeneral WorksNo2014-12-22T19:55:58Z2020-02-24T09:07:42Z1662020-06-19T09:42:34Z0001-494X
2Acta Dermato-Venereologicahttp://www.medicaljournals.se/actahttps://doaj.org/toc/ffde9666ab1d46f1a8c688ce6...NaN0001-55551651-2057sexually transmitted infections, psoriasis, ps...EnglishSociety for Publication of Acta Dermato-Venere...Sweden...NaNNaNRL1-803Medicine: DermatologyNo2011-11-10T12:31:05Z2017-02-22T11:14:48Z10962021-03-11T13:41:33Z0001-5555
3Acta Médica Costarricensehttp://actamedica.medicos.cr/index.php/Acta_Me...https://doaj.org/toc/a5919aee5ad2413a89cf32df0...NaN0001-60122215-5856medicine, public health, medical sciences, healthEnglish, SpanishColegio de Médicos y Cirujanos de Costa RicaCosta Rica...NaNNaNRMedicineNo2020-12-22T11:08:24Z2020-12-22T11:08:24Z12072015-12-08T15:06:43Z0001-6012
4Acta Mycologicahttps://pbsociety.org.pl/journals/index.php/am...https://doaj.org/toc/0e8e2531ae3f455ebb49acb08...NaN0001-625X2353-074Xmycology, micromycetes, marcomycetes, slime mo...EnglishPolish Botanical SocietyPoland...NaNNaNQH301-705.5Science: Biology (General)No2014-05-29T20:02:32Z2021-01-16T17:41:32Z11542021-03-05T18:55:46Z0001-625X
..................................................................
16024BME Frontiershttps://spj.sciencemag.org/bmefhttps://doaj.org/toc/f9fa881c1be5443a86ed71c2e...Biomedical Engineering FrontiersNaN2765-8031biomedical imaging, biomedical devices, biomat...EnglishAmerican Association for the Advancement of Sc...United States...NaNNaNR855-855.5|TP248.13-248.65Medicine: Medicine (General): Medical technolo...No2021-01-22T11:54:20Z2021-01-22T11:54:20Z112021-03-08T09:06:36Z2765-8031
16025Harvard Kennedy School Misinformation Reviewhttps://misinforeview.hks.harvard.eduhttps://doaj.org/toc/d71096ec7090499681cc0ccf8...HKS Misinformation ReviewNaN2766-1652misinformation, disinformation, fake newsEnglishHarvard Kennedy SchoolUnited States...NaNNaNT58.5-58.64|P87-96Technology: Technology (General): Industrial e...No2021-02-12T10:29:21Z2021-02-12T10:29:21Z0NaN2766-1652
16026One Health & Risk Managementhttps://journal.ohrm.bba.md/index.php/journal-...https://doaj.org/toc/68671b966cd24a0ebaa44d78f...OH&RM2887-34582587-3466one health, risc management, public health, hu...English, Romanian, French, RussianAsociatia de Biosiguranta si BiosecuritateMoldova, Republic of...NaNNaNR|QMedicine | ScienceNo2021-03-04T16:06:58Z2021-03-04T16:06:58Z42021-03-04T20:46:57Z2887-3458
16027فصلنامه پژوهش‌های مدیریت منابع انسانیhttps://hrmj.ihu.ac.ir/?lang=enhttps://doaj.org/toc/87d44ffb6ff849b18d5ddce9c...Journal of Research in Human Resources Management8254-80022645-5072human resources managementPersianImam Hussein UniversityIran, Islamic Republic of...NaNNaNHF5549-5549.5Social Sciences: Commerce: Business: Personnel...No2021-01-20T11:27:05Z2021-01-20T11:27:05Z0NaN8254-8002
16028Science of Tsunami Hazardshttp://tsunamisociety.org/https://doaj.org/toc/a4f06be11f4f4db489dc034c7...NaN8755-6839NaNtsunamis, tsunami warning systems, earthquakes...EnglishTsunami Society InternationalUnited States...NaNNaNGC1-1581Geography. Anthropology. Recreation: OceanographyNo2009-04-16T17:40:30Z2016-07-21T16:09:38Z2392021-02-27T01:00:51Z8755-6839
-

16029 rows × 54 columns

-
- - - - -```python -doaj = pd.merge(doaj, df_issnl, on='issn', how='left') -doaj -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal titleJournal URLURL in DOAJAlternative titleJournal ISSN (print version)Journal EISSN (online version)KeywordsLanguages in which the journal accepts manuscriptsPublisherCountry of publisher...Continued ByLCC CodesSubjectsDOAJ SealAdded on DateLast updated DateNumber of Article RecordsMost Recent Article Addedissnissnl
0Anais da Academia Brasileira de Ciênciashttp://www.scielo.br/scielo.php?script=sci_ser...https://doaj.org/toc/ed09859a464f4461b1af34279...Annals of the Brazilian Academy of Sciences0001-37651678-2690biological sciences, exact and earth sciences,...EnglishAcademia Brasileira de CiênciasBrazil...NaNQScienceNo2004-04-23T21:31:00Z2017-01-04T14:19:54Z26492020-06-10T21:49:11Z0001-37650001-3765
1ACMEhttp://riviste.unimi.it/index.php/ACMEhttps://doaj.org/toc/b1ca04ba56194f29a362b3eef...NaN0001-494X2282-0035italian literature, classic literature, lingui...ItalianUniversità degli Studi di MilanoItaly...NaNAGeneral WorksNo2014-12-22T19:55:58Z2020-02-24T09:07:42Z1662020-06-19T09:42:34Z0001-494X0001-494X
2Acta Dermato-Venereologicahttp://www.medicaljournals.se/actahttps://doaj.org/toc/ffde9666ab1d46f1a8c688ce6...NaN0001-55551651-2057sexually transmitted infections, psoriasis, ps...EnglishSociety for Publication of Acta Dermato-Venere...Sweden...NaNRL1-803Medicine: DermatologyNo2011-11-10T12:31:05Z2017-02-22T11:14:48Z10962021-03-11T13:41:33Z0001-55550001-5555
3Acta Médica Costarricensehttp://actamedica.medicos.cr/index.php/Acta_Me...https://doaj.org/toc/a5919aee5ad2413a89cf32df0...NaN0001-60122215-5856medicine, public health, medical sciences, healthEnglish, SpanishColegio de Médicos y Cirujanos de Costa RicaCosta Rica...NaNRMedicineNo2020-12-22T11:08:24Z2020-12-22T11:08:24Z12072015-12-08T15:06:43Z0001-60120001-6012
4Acta Mycologicahttps://pbsociety.org.pl/journals/index.php/am...https://doaj.org/toc/0e8e2531ae3f455ebb49acb08...NaN0001-625X2353-074Xmycology, micromycetes, marcomycetes, slime mo...EnglishPolish Botanical SocietyPoland...NaNQH301-705.5Science: Biology (General)No2014-05-29T20:02:32Z2021-01-16T17:41:32Z11542021-03-05T18:55:46Z0001-625X0001-625X
..................................................................
16024BME Frontiershttps://spj.sciencemag.org/bmefhttps://doaj.org/toc/f9fa881c1be5443a86ed71c2e...Biomedical Engineering FrontiersNaN2765-8031biomedical imaging, biomedical devices, biomat...EnglishAmerican Association for the Advancement of Sc...United States...NaNR855-855.5|TP248.13-248.65Medicine: Medicine (General): Medical technolo...No2021-01-22T11:54:20Z2021-01-22T11:54:20Z112021-03-08T09:06:36Z2765-8031NaN
16025Harvard Kennedy School Misinformation Reviewhttps://misinforeview.hks.harvard.eduhttps://doaj.org/toc/d71096ec7090499681cc0ccf8...HKS Misinformation ReviewNaN2766-1652misinformation, disinformation, fake newsEnglishHarvard Kennedy SchoolUnited States...NaNT58.5-58.64|P87-96Technology: Technology (General): Industrial e...No2021-02-12T10:29:21Z2021-02-12T10:29:21Z0NaN2766-1652NaN
16026One Health & Risk Managementhttps://journal.ohrm.bba.md/index.php/journal-...https://doaj.org/toc/68671b966cd24a0ebaa44d78f...OH&RM2887-34582587-3466one health, risc management, public health, hu...English, Romanian, French, RussianAsociatia de Biosiguranta si BiosecuritateMoldova, Republic of...NaNR|QMedicine | ScienceNo2021-03-04T16:06:58Z2021-03-04T16:06:58Z42021-03-04T20:46:57Z2887-3458NaN
16027فصلنامه پژوهش‌های مدیریت منابع انسانیhttps://hrmj.ihu.ac.ir/?lang=enhttps://doaj.org/toc/87d44ffb6ff849b18d5ddce9c...Journal of Research in Human Resources Management8254-80022645-5072human resources managementPersianImam Hussein UniversityIran, Islamic Republic of...NaNHF5549-5549.5Social Sciences: Commerce: Business: Personnel...No2021-01-20T11:27:05Z2021-01-20T11:27:05Z0NaN8254-8002NaN
16028Science of Tsunami Hazardshttp://tsunamisociety.org/https://doaj.org/toc/a4f06be11f4f4db489dc034c7...NaN8755-6839NaNtsunamis, tsunami warning systems, earthquakes...EnglishTsunami Society InternationalUnited States...NaNGC1-1581Geography. Anthropology. Recreation: OceanographyNo2009-04-16T17:40:30Z2016-07-21T16:09:38Z2392021-02-27T01:00:51Z8755-68398755-6839
-

16029 rows × 55 columns

-
- - - - -```python -doaj.columns -``` - - - - - Index(['Journal title', 'Journal URL', 'URL in DOAJ', 'Alternative title', - 'Journal ISSN (print version)', 'Journal EISSN (online version)', - 'Keywords', 'Languages in which the journal accepts manuscripts', - 'Publisher', 'Country of publisher', 'Society or institution', - 'Country of society or institution', 'Journal license', - 'License attributes', 'URL for license terms', - 'Machine-readable CC licensing information embedded or displayed in articles', - 'URL to an example page with embedded licensing information', - 'Author holds copyright without restrictions', - 'Copyright information URL', 'Review process', - 'Review process information URL', 'Journal plagiarism screening policy', - 'Plagiarism information URL', 'URL for journal's aims & scope', - 'URL for the Editorial Board page', - 'URL for journal's instructions for authors', - 'Average number of weeks between article submission and publication', - 'APC', 'APC information URL', 'APC amount', - 'Journal waiver policy (for developing country authors etc)', - 'Waiver policy information URL', 'Has other fees', - 'Other submission fees information URL', 'Preservation Services', - 'Preservation Service: national library', - 'Preservation information URL', 'Deposit policy directory', - 'URL for deposit policy', 'Persistent article identifiers', - 'Article metadata includes ORCIDs', - 'Journal complies with I4OC standards for open citations', - 'Does this journal allow unrestricted reuse in compliance with BOAI?', - 'URL for journal's Open Access statement', 'Continues', 'Continued By', - 'LCC Codes', 'Subjects', 'DOAJ Seal', 'Added on Date', - 'Last updated Date', 'Number of Article Records', - 'Most Recent Article Added', 'issn', 'issnl'], - dtype='object') - - - - -```python -doaj['Preservation Services'] -``` - - - - - 0 NaN - 1 NaN - 2 NaN - 3 PKP PN - 4 NaN - ... - 16024 NaN - 16025 NaN - 16026 NaN - 16027 NaN - 16028 NaN - Name: Preservation Services, Length: 16029, dtype: object - - - - -```python -doaj['DOAJ Seal'] -``` - - - - - 0 No - 1 No - 2 No - 3 No - 4 No - .. - 16024 No - 16025 No - 16026 No - 16027 No - 16028 No - Name: DOAJ Seal, Length: 16029, dtype: object - - - - -```python -doaj['issnl'] -``` - - - - - 0 0001-3765 - 1 0001-494X - 2 0001-5555 - 3 0001-6012 - 4 0001-625X - ... - 16024 NaN - 16025 NaN - 16026 NaN - 16027 NaN - 16028 8755-6839 - Name: issnl, Length: 16029, dtype: object - - - - -```python -doaj['APC'].value_counts() -``` - - - - - No 11567 - Yes 4462 - Name: APC, dtype: int64 - - - - -```python -# ajout des infos de DOAJ : -# Journal title -# DOAJ Seal -doaj_for_merge = doaj[['issnl', 'Journal title', 'DOAJ Seal', 'APC']] -doaj_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlJournal titleDOAJ SealAPC
00001-3765Anais da Academia Brasileira de CiênciasNoNo
10001-494XACMENoNo
20001-5555Acta Dermato-VenereologicaNoYes
30001-6012Acta Médica CostarricenseNoNo
40001-625XActa MycologicaNoYes
...............
16024NaNBME FrontiersNoNo
16025NaNHarvard Kennedy School Misinformation ReviewNoNo
16026NaNOne Health & Risk ManagementNoNo
16027NaNفصلنامه پژوهش‌های مدیریت منابع انسانیNoNo
160288755-6839Science of Tsunami HazardsNoNo
-

16029 rows × 4 columns

-
- - - - -```python -# renommer les colonnes -doaj_for_merge = doaj_for_merge.rename(columns={'Journal title' : 'doaj_title', 'DOAJ Seal' : 'doaj_seal'}) -doaj_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnldoaj_titledoaj_sealAPC
00001-3765Anais da Academia Brasileira de CiênciasNoNo
10001-494XACMENoNo
20001-5555Acta Dermato-VenereologicaNoYes
30001-6012Acta Médica CostarricenseNoNo
40001-625XActa MycologicaNoYes
...............
16024NaNBME FrontiersNoNo
16025NaNHarvard Kennedy School Misinformation ReviewNoNo
16026NaNOne Health & Risk ManagementNoNo
16027NaNفصلنامه پژوهش‌های مدیریت منابع انسانیNoNo
160288755-6839Science of Tsunami HazardsNoNo
-

16029 rows × 4 columns

-
- - - - -```python -# merge avec journals -journals = pd.merge(journals, doaj_for_merge, on='issnl', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPC
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONEYesYes
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN
..........................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN
-

996 rows × 13 columns

-
- - - - -```python -# ajouter info sur la presence sur DOAJ ou du seal -journals.loc[journals['doaj_title'].isna(), 'doaj_status'] = 0 -journals.loc[~journals['doaj_title'].isna(), 'doaj_status'] = 1 -journals.loc[journals['doaj_seal'] == 'Yes', 'doaj_seal'] = 1 -journals.loc[journals['doaj_seal'] == 'No', 'doaj_seal'] = 0 -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0
.............................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0
-

996 rows × 14 columns

-
- - - -### LOCKSS - - -```python -# ajout des infos de preservation LOCKSS, Portico et Licences Nationales -lockss = pd.read_csv('lockss/keepers-LOCKSS-report.csv', encoding='utf-8', header=0, skiprows=1) -lockss -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleISSNeISSNPreserved VolumesPreserved YearsIn Progress VolumesIn Progress Years
0ARKAT USAARKIVOC1551-70041551-70122000; 2001; 2002; 2003; 2004; 2004; 2004; 20052000; 2001; 2002; 2003; 2004; 2004; 2004; 2005NaNNaN
1Ab ImperioAb Imperio2166-40722164-97312005; 2006; 2007; 2008; 2009; 2010; 2011; 2012...2000; 2001; 2002; 2003; 2004; 2005; 2005; 2006...NaN2020
2Absinthe Literary ReviewAbsinthe Literary ReviewNaN1939-0343NaN2003; 2004; 2005NaNNaN
3Academy HealtheGEMsNaN2327-92141; 2; 2; 3; 42013; 2014; 2014; 2015; 2016NaNNaN
4Academy of American Franciscan HistoryThe Americas0003-16151533-624757; 58; 59; 60; 61; 62; 63; 64; 65; 66; 67; 68...2000; 2001; 2002; 2003; 2004; 2005; 2006; 2007...NaNNaN
...........................
14988Youngstown State University Center for Judaic ...Journal of Jewish Identities1946-25221939-79411; 2; 3; 4; 5; 6; 7; 82008; 2009; 2010; 2011; 2012; 2013; 2014; 2015NaNNaN
14989Zoological Society of JapanZoological Science0289-0003NaN12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23...1995; 1996; 1997; 1998; 1999; 2000; 2001; 2002...NaNNaN
14990Zoological Society of Southern AfricaAfrican Zoology1562-70202224-073X41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 522006; 2007; 2008; 2009; 2010; 2011; 2012; 2013...NaNNaN
14991eLife Sciences PublicationseLifeNaN2050-084XNaN2014; 2014; 2014; 2014; 2014; 2014; 2014; 2014...NaNNaN
14992frommann-holzboogSteiner StudiesNaN2698-217XNaNNaN12020
-

14993 rows × 8 columns

-
- - - - -```python -# ajout ISSNL -lockss['issn'] = lockss['eISSN'] -lockss.loc[lockss['eISSN'].isna(), 'issn'] = lockss['ISSN'] -lockss -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleISSNeISSNPreserved VolumesPreserved YearsIn Progress VolumesIn Progress Yearsissn
0ARKAT USAARKIVOC1551-70041551-70122000; 2001; 2002; 2003; 2004; 2004; 2004; 20052000; 2001; 2002; 2003; 2004; 2004; 2004; 2005NaNNaN1551-7012
1Ab ImperioAb Imperio2166-40722164-97312005; 2006; 2007; 2008; 2009; 2010; 2011; 2012...2000; 2001; 2002; 2003; 2004; 2005; 2005; 2006...NaN20202164-9731
2Absinthe Literary ReviewAbsinthe Literary ReviewNaN1939-0343NaN2003; 2004; 2005NaNNaN1939-0343
3Academy HealtheGEMsNaN2327-92141; 2; 2; 3; 42013; 2014; 2014; 2015; 2016NaNNaN2327-9214
4Academy of American Franciscan HistoryThe Americas0003-16151533-624757; 58; 59; 60; 61; 62; 63; 64; 65; 66; 67; 68...2000; 2001; 2002; 2003; 2004; 2005; 2006; 2007...NaNNaN1533-6247
..............................
14988Youngstown State University Center for Judaic ...Journal of Jewish Identities1946-25221939-79411; 2; 3; 4; 5; 6; 7; 82008; 2009; 2010; 2011; 2012; 2013; 2014; 2015NaNNaN1939-7941
14989Zoological Society of JapanZoological Science0289-0003NaN12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23...1995; 1996; 1997; 1998; 1999; 2000; 2001; 2002...NaNNaN0289-0003
14990Zoological Society of Southern AfricaAfrican Zoology1562-70202224-073X41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 522006; 2007; 2008; 2009; 2010; 2011; 2012; 2013...NaNNaN2224-073X
14991eLife Sciences PublicationseLifeNaN2050-084XNaN2014; 2014; 2014; 2014; 2014; 2014; 2014; 2014...NaNNaN2050-084X
14992frommann-holzboogSteiner StudiesNaN2698-217XNaNNaN120202698-217X
-

14993 rows × 9 columns

-
- - - - -```python -lockss = pd.merge(lockss, df_issnl, on='issn', how='left') -lockss -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleISSNeISSNPreserved VolumesPreserved YearsIn Progress VolumesIn Progress Yearsissnissnl
0ARKAT USAARKIVOC1551-70041551-70122000; 2001; 2002; 2003; 2004; 2004; 2004; 20052000; 2001; 2002; 2003; 2004; 2004; 2004; 2005NaNNaN1551-70121551-7004
1Ab ImperioAb Imperio2166-40722164-97312005; 2006; 2007; 2008; 2009; 2010; 2011; 2012...2000; 2001; 2002; 2003; 2004; 2005; 2005; 2006...NaN20202164-97312166-4072
2Absinthe Literary ReviewAbsinthe Literary ReviewNaN1939-0343NaN2003; 2004; 2005NaNNaN1939-03431939-0343
3Academy HealtheGEMsNaN2327-92141; 2; 2; 3; 42013; 2014; 2014; 2015; 2016NaNNaN2327-92142327-9214
4Academy of American Franciscan HistoryThe Americas0003-16151533-624757; 58; 59; 60; 61; 62; 63; 64; 65; 66; 67; 68...2000; 2001; 2002; 2003; 2004; 2005; 2006; 2007...NaNNaN1533-62470003-1615
.................................
14988Youngstown State University Center for Judaic ...Journal of Jewish Identities1946-25221939-79411; 2; 3; 4; 5; 6; 7; 82008; 2009; 2010; 2011; 2012; 2013; 2014; 2015NaNNaN1939-79411939-7941
14989Zoological Society of JapanZoological Science0289-0003NaN12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23...1995; 1996; 1997; 1998; 1999; 2000; 2001; 2002...NaNNaN0289-00030289-0003
14990Zoological Society of Southern AfricaAfrican Zoology1562-70202224-073X41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 522006; 2007; 2008; 2009; 2010; 2011; 2012; 2013...NaNNaN2224-073X1562-7020
14991eLife Sciences PublicationseLifeNaN2050-084XNaN2014; 2014; 2014; 2014; 2014; 2014; 2014; 2014...NaNNaN2050-084X2050-084X
14992frommann-holzboogSteiner StudiesNaN2698-217XNaNNaN120202698-217XNaN
-

14993 rows × 10 columns

-
- - - - -```python -lockss.columns -``` - - - - - Index(['Publisher', 'Title', 'ISSN', 'eISSN', 'Preserved Volumes', - 'Preserved Years', 'In Progress Volumes', 'In Progress Years', 'issn', - 'issnl'], - dtype='object') - - - - -```python -# test des lignes sans merge -lockss.loc[lockss['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleISSNeISSNPreserved VolumesPreserved YearsIn Progress VolumesIn Progress Yearsissnissnl
5Academy of ManagementAcademy of Management Discoveries (AMD)NaN2168-10071; 2; 32015; 2016; 2017NaNNaN2168-1007NaN
28Alliance of Crop, Soil, and Environmental Scie...Soil HorizonsNaN2163-281250; 51; 52; 53; 54; 55; 562009; 2010; 2011; 2012; 2013; 2014; 2015NaNNaN2163-2812NaN
131American Institute of Aeronautics and Astronau...Air Traffic Control Quarterly1064-38182472-57571; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15...1993; 1995; 1996; 1997; 1998; 1999; 2000; 2001...219942472-5757NaN
134American Institute of Aeronautics and Astronau...Journal of Air TransportationNaN2380-945024; 25; 26; 272016; 2017; 2018; 20192820202380-9450NaN
192American Psychiatric Association PublishingPsychiatric Research and Clinical PracticeNaN2575-560912019220202575-5609NaN
.................................
14900Utrecht University LibraryEarly Modern Low CountriesNaN2543-1587NaNNaN1; 2; 3; 42017; 2018; 2019; 20202543-1587NaN
14968White Rose University PressBritish and Irish Orthoptic JournalNaN2516-35906; 7; 8; 9; 10; 11; 12; 13; 14; 162009; 2010; 2011; 2012; 2013; 2014; 2015; 2016...1720202516-3590NaN
14970White Rose University PressUndergraduate Journal of Politics and Internat...NaN2398-59921; 22018; 2019NaNNaN2398-5992NaN
14985World Haiku ClubWorld Haiku ReviewNaNNaN1; 2; 32001; 2002; 2003NaNNaNNaNNaN
14992frommann-holzboogSteiner StudiesNaN2698-217XNaNNaN120202698-217XNaN
-

835 rows × 10 columns

-
- - - - -```python -# utiliser l'ISSN à la place sur ces lignes -lockss.loc[lockss['issnl'].isna(), 'issnl'] = lockss['issn'] -``` - - -```python -# test des lignes sans merge -lockss.loc[lockss['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleISSNeISSNPreserved VolumesPreserved YearsIn Progress VolumesIn Progress Yearsissnissnl
317Association des Amis des CryptogamesCryptogamie, AlgologieNaNNaN32; 33; 34; 35; 36; 37; 382011; 2012; 2013; 2014; 2015; 2016; 2017NaNNaNNaNNaN
318Association des Amis des CryptogamesCryptogamie, BryologieNaNNaN32; 33; 34; 35; 36; 37; 382011; 2012; 2013; 2014; 2015; 2016; 2017NaNNaNNaNNaN
319Association des Amis des CryptogamesCryptogamie, MycologieNaNNaN32; 33; 34; 35; 36; 37; 382011; 2012; 2013; 2014; 2015; 2016; 2017NaNNaNNaNNaN
850Boston College LibrariesFresh Ink: Essays From Boston College's First-...NaNNaN12; 13; 13; 92009; 2010; 2011; 2007NaNNaNNaNNaN
1681Exquisite CorpseExquisite CorpseNaNNaNNaN1999NaNNaNNaNNaN
2032Georgia Southern UniversityIrish Studies SouthNaNNaN12014NaNNaNNaNNaN
2039Georgia Southern UniversityThe Journal of Student Success in WritingNaNNaN12017NaNNaNNaNNaN
3526LOCKSS ProgramLOCKSS CardNaNNaNNaN2005; 2006; 2006; 2006NaNNaNNaNNaN
4721Oxford University PressInternational Immunology Meeting AbstractsNaNNaNNaNNaNNaNNaNNaNNaN
6725Sagamore PublishingJournal of Facility Planning, Design, and Mana...NaNNaN1; 2; 3; 42013; 2014; 2015; 2016NaNNaNNaNNaN
10718State of AlaskaAlaska State DocumentsNaNNaNNaN2005; 2005; 2006; 2006; 2007; 2007; 2008; 2008...NaNNaNNaNNaN
14985World Haiku ClubWorld Haiku ReviewNaNNaN1; 2; 32001; 2002; 2003NaNNaNNaNNaN
-
- - - - -```python -# ajout des infos de LOCKSS : -# Title -lockss_for_merge = lockss[['issnl', 'Title']] -lockss_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlTitle
01551-7004ARKIVOC
12166-4072Ab Imperio
21939-0343Absinthe Literary Review
32327-9214eGEMs
40003-1615The Americas
.........
149881939-7941Journal of Jewish Identities
149890289-0003Zoological Science
149901562-7020African Zoology
149912050-084XeLife
149922698-217XSteiner Studies
-

14993 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -lockss_for_merge = lockss_for_merge.rename(columns={'Title' : 'lockss_title'}) -lockss_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnllockss_title
01551-7004ARKIVOC
12166-4072Ab Imperio
21939-0343Absinthe Literary Review
32327-9214eGEMs
40003-1615The Americas
.........
149881939-7941Journal of Jewish Identities
149890289-0003Zoological Science
149901562-7020African Zoology
149912050-084XeLife
149922698-217XSteiner Studies
-

14993 rows × 2 columns

-
- - - - -```python -# merge avec journals -journals = pd.merge(journals, lockss_for_merge, on='issnl', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_title
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN
................................................
10009970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN
10019980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN
10029991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields
100310000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN
100410010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN
-

1005 rows × 15 columns

-
- - - - -```python -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_title
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN
................................................
10009970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN
10019980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN
10029991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields
100310000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN
100410010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN
-

996 rows × 15 columns

-
- - - - -```python -# ajouter info sur la presence sur LOCKSS -journals.loc[journals['lockss_title'].isna(), 'lockss'] = 0 -journals.loc[~journals['lockss_title'].isna(), 'lockss'] = 1 -journals -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexing.py:376: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - self.obj[key] = _infer_fill_value(value) - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexing.py:494: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - self.obj[item] = s - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockss
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0
...................................................
10009970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0
10019980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0
10029991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0
100310000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0
100410010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0
-

996 rows × 16 columns

-
- - - -### Portico - - -```python -# ajout des infos de preservation Portico -portico = pd.read_excel('portico/e-journals.xlsx', sheet_name='Details', skiprows=2) -portico -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleSocietyPrint ISSNe-ISSNPCAStatusYearsContentSet IdHoldings...Unnamed: 13Unnamed: 14Unnamed: 15Unnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20Unnamed: 21Unnamed: 22
0ACI Information Group (through 2018)ACI Information GroupNaNNaN2374-1406Nopreserved2017-2018ACI Scholarly Blog Content2017 - v. 2017 (January-December), 2018 - v. 2......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1AECL Nuclear ReviewCNL Nuclear ReviewNaN2369-69312369-6923Yespreserved2016-2020ISSN_236969312016 - v. 5 (1-2), 2016/2017 - v. 6 (1-2), 201......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2AECL Nuclear ReviewAECL Nuclear ReviewNaN1929-80561929-6371Yespreserved2014-2015ISSN_192980562014 - v. 1 (1-2), 2014 - v. 2 (1-2), 2014 - v......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3AIP PublishingLow Temperature PhysicsNaN1063-777X1090-6517Yespreserved1997-2021ISSN_1063777X1997 - v. 23 (1-5, 7-12), 1998 - v. 24 (1-12),......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4AIP PublishingPhysics of Fluids A: Fluid DynamicsNaN0899-8213NaNYespreserved1989-1993ISSN_089982131989 - v. 1 (1-12), 1990 - v. 2 (1-12), 1991 -......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
35550Zeal Press Ltd.International Journal of Robotics and Automati...NaNNaN2409-9694NaNqueued-ISSN_24099694_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35551Zeal Press Ltd.Journal of Material Science and Technology Res...NaNNaN2410-4701NaNqueued-ISSN_24104701_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35552Zeal Press Ltd.Journal of Modern Mechanical Engineering and T...NaNNaN2409-9848NaNqueued-ISSN_24099848_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35553Zeal Press Ltd.Journal of Solar Energy Research UpdatesNaNNaN2410-2199NaNqueued-ISSN_24102199_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35554icddr,b (through 2015)Journal of Health, Population and Nutrition (J...NaN1606-0997NaNYespreserved2005-2015ISSN_160609972005 - v. 23 (3-4), 2006 - v. 24 (1-4), 2007 -......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
-

35555 rows × 23 columns

-
- - - - -```python -# ajout ISSNL -portico['issn'] = portico['e-ISSN'] -portico.loc[portico['e-ISSN'].isna(), 'issn'] = portico['Print ISSN'] -portico -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleSocietyPrint ISSNe-ISSNPCAStatusYearsContentSet IdHoldings...Unnamed: 14Unnamed: 15Unnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20Unnamed: 21Unnamed: 22issn
0ACI Information Group (through 2018)ACI Information GroupNaNNaN2374-1406Nopreserved2017-2018ACI Scholarly Blog Content2017 - v. 2017 (January-December), 2018 - v. 2......NaNNaNNaNNaNNaNNaNNaNNaNNaN2374-1406
1AECL Nuclear ReviewCNL Nuclear ReviewNaN2369-69312369-6923Yespreserved2016-2020ISSN_236969312016 - v. 5 (1-2), 2016/2017 - v. 6 (1-2), 201......NaNNaNNaNNaNNaNNaNNaNNaNNaN2369-6923
2AECL Nuclear ReviewAECL Nuclear ReviewNaN1929-80561929-6371Yespreserved2014-2015ISSN_192980562014 - v. 1 (1-2), 2014 - v. 2 (1-2), 2014 - v......NaNNaNNaNNaNNaNNaNNaNNaNNaN1929-6371
3AIP PublishingLow Temperature PhysicsNaN1063-777X1090-6517Yespreserved1997-2021ISSN_1063777X1997 - v. 23 (1-5, 7-12), 1998 - v. 24 (1-12),......NaNNaNNaNNaNNaNNaNNaNNaNNaN1090-6517
4AIP PublishingPhysics of Fluids A: Fluid DynamicsNaN0899-8213NaNYespreserved1989-1993ISSN_089982131989 - v. 1 (1-12), 1990 - v. 2 (1-12), 1991 -......NaNNaNNaNNaNNaNNaNNaNNaNNaN0899-8213
..................................................................
35550Zeal Press Ltd.International Journal of Robotics and Automati...NaNNaN2409-9694NaNqueued-ISSN_24099694_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaN2409-9694
35551Zeal Press Ltd.Journal of Material Science and Technology Res...NaNNaN2410-4701NaNqueued-ISSN_24104701_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaN2410-4701
35552Zeal Press Ltd.Journal of Modern Mechanical Engineering and T...NaNNaN2409-9848NaNqueued-ISSN_24099848_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaN2409-9848
35553Zeal Press Ltd.Journal of Solar Energy Research UpdatesNaNNaN2410-2199NaNqueued-ISSN_24102199_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaN2410-2199
35554icddr,b (through 2015)Journal of Health, Population and Nutrition (J...NaN1606-0997NaNYespreserved2005-2015ISSN_160609972005 - v. 23 (3-4), 2006 - v. 24 (1-4), 2007 -......NaNNaNNaNNaNNaNNaNNaNNaNNaN1606-0997
-

35555 rows × 24 columns

-
- - - - -```python -portico = pd.merge(portico, df_issnl, on='issn', how='left') -portico -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleSocietyPrint ISSNe-ISSNPCAStatusYearsContentSet IdHoldings...Unnamed: 15Unnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20Unnamed: 21Unnamed: 22issnissnl
0ACI Information Group (through 2018)ACI Information GroupNaNNaN2374-1406Nopreserved2017-2018ACI Scholarly Blog Content2017 - v. 2017 (January-December), 2018 - v. 2......NaNNaNNaNNaNNaNNaNNaNNaN2374-14062374-1406
1AECL Nuclear ReviewCNL Nuclear ReviewNaN2369-69312369-6923Yespreserved2016-2020ISSN_236969312016 - v. 5 (1-2), 2016/2017 - v. 6 (1-2), 201......NaNNaNNaNNaNNaNNaNNaNNaN2369-6923NaN
2AECL Nuclear ReviewAECL Nuclear ReviewNaN1929-80561929-6371Yespreserved2014-2015ISSN_192980562014 - v. 1 (1-2), 2014 - v. 2 (1-2), 2014 - v......NaNNaNNaNNaNNaNNaNNaNNaN1929-63711929-8056
3AIP PublishingLow Temperature PhysicsNaN1063-777X1090-6517Yespreserved1997-2021ISSN_1063777X1997 - v. 23 (1-5, 7-12), 1998 - v. 24 (1-12),......NaNNaNNaNNaNNaNNaNNaNNaN1090-65171063-777X
4AIP PublishingPhysics of Fluids A: Fluid DynamicsNaN0899-8213NaNYespreserved1989-1993ISSN_089982131989 - v. 1 (1-12), 1990 - v. 2 (1-12), 1991 -......NaNNaNNaNNaNNaNNaNNaNNaN0899-82130899-8213
..................................................................
35550Zeal Press Ltd.International Journal of Robotics and Automati...NaNNaN2409-9694NaNqueued-ISSN_24099694_1023-...NaNNaNNaNNaNNaNNaNNaNNaN2409-96942409-9694
35551Zeal Press Ltd.Journal of Material Science and Technology Res...NaNNaN2410-4701NaNqueued-ISSN_24104701_1023-...NaNNaNNaNNaNNaNNaNNaNNaN2410-47012410-4701
35552Zeal Press Ltd.Journal of Modern Mechanical Engineering and T...NaNNaN2409-9848NaNqueued-ISSN_24099848_1023-...NaNNaNNaNNaNNaNNaNNaNNaN2409-98482409-9848
35553Zeal Press Ltd.Journal of Solar Energy Research UpdatesNaNNaN2410-2199NaNqueued-ISSN_24102199_1023-...NaNNaNNaNNaNNaNNaNNaNNaN2410-21992410-2199
35554icddr,b (through 2015)Journal of Health, Population and Nutrition (J...NaN1606-0997NaNYespreserved2005-2015ISSN_160609972005 - v. 23 (3-4), 2006 - v. 24 (1-4), 2007 -......NaNNaNNaNNaNNaNNaNNaNNaN1606-09971606-0997
-

35555 rows × 25 columns

-
- - - - -```python -portico.columns -``` - - - - - Index(['Publisher', 'Title', 'Society', 'Print ISSN', 'e-ISSN', 'PCA', - 'Status', 'Years', 'ContentSet Id', 'Holdings', 'Unnamed: 10', - 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', - 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', - 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'issn', - 'issnl'], - dtype='object') - - - - -```python -# test des lignes sans merge -portico.loc[portico['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleSocietyPrint ISSNe-ISSNPCAStatusYearsContentSet IdHoldings...Unnamed: 15Unnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20Unnamed: 21Unnamed: 22issnissnl
1AECL Nuclear ReviewCNL Nuclear ReviewNaN2369-69312369-6923Yespreserved2016-2020ISSN_236969312016 - v. 5 (1-2), 2016/2017 - v. 6 (1-2), 201......NaNNaNNaNNaNNaNNaNNaNNaN2369-6923NaN
9AIP PublishingAPL BioengineeringNaNNaN2473-2877Yespreserved2017-2021ISSN_2473428772017 - v. 1 (1), 2018 - v. 2 (1-4), 2019 - v. ......NaNNaNNaNNaNNaNNaNNaNNaN2473-2877NaN
14AIP PublishingBiophysics ReviewsNaNNaN2688-4089Yespreserved2020-2021ISSN_26884089_152020 - v. 1 (1), 2021 - v. 2 (1)...NaNNaNNaNNaNNaNNaNNaNNaN2688-4089NaN
16AIP PublishingJournal of Undergraduate Reports in PhysicsNaNNaN2642-7451Yespreserved2018-2020ISSN_26427451_152018 - v. 28 (1), 2019 - v. 29 (1), 2020 - v. ......NaNNaNNaNNaNNaNNaNNaNNaN2642-7451NaN
20AIP PublishingNanotechnology and Precision EngineeringNaN1672-60302589-5540NaNpreserved2018-2021ISSN_16726030_152018 - v. 1 (1-4), 2019 - v. 2 (1-4), 2020 - v......NaNNaNNaNNaNNaNNaNNaNNaN2589-5540NaN
..................................................................
35539World ScientificDivision of Labor & Transaction CostsNaN0219-87111793-7000Nopreserved2005-2011ISSN_021987112005/2006 - v. 1 (1-2), 2006/2007 - v. 2 (1-2)......NaNNaNNaNNaNNaNNaNNaNNaN1793-7000NaN
35540World ScientificJournal of Medical Robotics ResearchNaN2424-905X2424-9068Nopreserved2016-2020ISSN_2424905X2016 - v. 1 (1-4), 2017 - v. 2 (1-4), 2018 - v......NaNNaNNaNNaNNaNNaNNaNNaN2424-9068NaN
35541World ScientificInternational Journal of Foundations of Comput...NaN0129-05411793-6373Nopreserved1990-2021ISSN_012905411990 - v. 1 (1-4), 1991 - v. 2 (1-4), 1992 - v......NaNNaNNaNNaNNaNNaNNaNNaN1793-6373NaN
35542World ScientificMolecular Frontiers JournalNaN2529-73252529-7333Nopreserved2017-2020ISSN_252973252017 - v. 1 (1-2, null), 2018 - v. 2 (1), 2019......NaNNaNNaNNaNNaNNaNNaNNaN2529-7333NaN
35543World ScientificWater Economics and PolicyNaN2382-624X2382-6258Nopreserved2015-2020ISSN_2382624X2015 - v. 1 (1-4), 2016 - v. 2 (1-4), 2017 - v......NaNNaNNaNNaNNaNNaNNaNNaN2382-6258NaN
-

4086 rows × 25 columns

-
- - - - -```python -# utiliser l'ISSN à la place sur ces lignes -portico.loc[portico['issnl'].isna(), 'issnl'] = portico['issn'] -``` - - -```python -# test des lignes sans merge -portico.loc[portico['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleSocietyPrint ISSNe-ISSNPCAStatusYearsContentSet IdHoldings...Unnamed: 15Unnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20Unnamed: 21Unnamed: 22issnissnl
41ASTM InternationalASTM StandardsNaNNaNNaNYesqueued-ASTM Standards-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
58Academic JournalsInternational Journal of Vocational and Techni...NaNNaNNaNNaNqueued-ISSN_TBD70-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
78Academic JournalsJournal of Metabolomics and Systems BiologyNaNNaNNaNNaNqueued-ISSN_TBD68-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
180Academy of ResearchThe Microfinance JournalNaNNaNNaNNaNqueued-TBD_MJ_1242-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
254African Online Scientific Information Systems ...Journal of African ForesightNaNNaNNaNNaNqueued-ISSN_TBD288-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
34911Wolters Kluwer HealthAJSP OpenNaNNaNNaNYesqueued-TBD_74_1-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
34915Wolters Kluwer HealthAnnals of Surgery OANaNNaNNaNYesqueued-TBD_74_2-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35047Wolters Kluwer HealthOtology & Neurotology OpenNaNNaNNaNYesqueued-TBD_ONO_74-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35058Wolters Kluwer HealthNorthwest Journal of OptometryNaNNaNNaNYespreserved1924-1925NJO_74v.1(1-12),v.2(1-7)...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35209Wolters Kluwer HealthOccupational Therapy & RehabilitationNaNNaNNaNYespreserved1925-1951OTR_74v.22(1-6),v.23(1-6),v.24(1-6),v.25(1-6),v.26(1......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
-

300 rows × 25 columns

-
- - - - -```python -# ajout des infos de Portico : -# Status -portico_for_merge = portico[['issnl', 'Status']] -portico_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlStatus
02374-1406preserved
12369-6923preserved
21929-8056preserved
31063-777Xpreserved
40899-8213preserved
.........
355502409-9694queued
355512410-4701queued
355522409-9848queued
355532410-2199queued
355541606-0997preserved
-

35555 rows × 2 columns

-
- - - - -```python -# garder les lignes "preserved" -portico_for_merge = portico_for_merge.loc[portico_for_merge['Status'] == 'preserved'] -portico_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlStatus
02374-1406preserved
12369-6923preserved
21929-8056preserved
31063-777Xpreserved
40899-8213preserved
.........
355462572-5505preserved
355472225-0719preserved
355482472-0712preserved
355492377-231Xpreserved
355541606-0997preserved
-

33177 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -portico_for_merge = portico_for_merge.rename(columns={'Status' : 'portico_status'}) -portico_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlportico_status
02374-1406preserved
12369-6923preserved
21929-8056preserved
31063-777Xpreserved
40899-8213preserved
.........
355462572-5505preserved
355472225-0719preserved
355482472-0712preserved
355492377-231Xpreserved
355541606-0997preserved
-

33177 rows × 2 columns

-
- - - - -```python -# merge avec journals -journals = pd.merge(journals, portico_for_merge, on='issnl', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0NaN
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved
......................................................
10779980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved
10789991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved
10799991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved
108010000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved
108110010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN
-

1082 rows × 17 columns

-
- - - - -```python -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0NaN
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved
......................................................
10769970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0preserved
10779980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved
10789991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved
108010000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved
108110010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN
-

996 rows × 17 columns

-
- - - - -```python -# ajouter info sur la presence sur portico -journals.loc[journals['portico_status'].isna(), 'portico'] = 0 -journals.loc[~journals['portico_status'].isna(), 'portico'] = 1 -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusportico
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0NaN0.0
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved1.0
.........................................................
10769970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0preserved1.0
10779980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0
10789991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0
108010000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0
108110010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0
-

996 rows × 18 columns

-
- - - -### Licences Nationales - - -```python -# ajout des infos de preservation des Licences nationales -nlch1 = pd.read_excel('licences_nationales/cambridge_Switzerland_NationalLicences_2020-08-17.xlsx') -nlch1 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publication_titleprint_identifieronline_identifierdate_first_issue_onlinenum_first_vol_onlinenum_first_issue_onlinedate_last_issue_onlinenum_last_vol_onlinenum_last_issue_onlinetitle_url...publisher_namepublication_typedate_monograph_published_printdate_monograph_published_onlinemonograph_volumemonograph_editionfirst_editorparent_publication_title_idpreceding_publication_title_idaccess_type
0Journal of Agricultural and Applied Economics1074-0708NaN19691.0NaN201547.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
1Advances in Applied Mathematics and Mechanics2070-07332075-135420113.0NaN20158.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
2Annals of Actuarial Science1748-49951748-500220061.0NaN20159.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
3Advances in Animal Biosciences2040-47002040-471920101.0NaN20156.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
4Archaeologia0261-3409NaN17701.0NaN1992110.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
389Zygote0967-19941469-873019931.0NaN201523.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
390Political Analysis1047-19871476-498919891.0NaN201523.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
391Business and Politics1369-52581469-356919991.0NaN201517.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
392Transactions of the Institute of Actuaries2047-28382398-738318491.0NaN18521.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
393Transactions of the International Astronomical...NaN0251-107X19221.01.0200725.02.0https://www.cambridge.org/core/journals/procee......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
-

394 rows × 25 columns

-
- - - - -```python -# ajout des infos de preservation des Licences nationales -nlch2 = pd.read_excel('licences_nationales/gruyter_Switzerland_NationalLicences_2020-11-30.xlsx') -nlch2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publication_titleprint_identifieronline_identifierdate_first_issue_onlinenum_first_vol_onlinenum_first_issue_onlinedate_last_issue_onlinenum_last_vol_onlinenum_last_issue_onlinetitle_url...publisher_namepublication_typedate_monograph_published_printdate_monograph_published_onlinemonograph_volumemonograph_editionfirst_editorparent_publication_title_idpreceding_publication_title_idaccess_type
0ABI Technik0720-67632191-4664199616NaN201737.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
1Accounting, Economics, and Law: A Convivium2194-60512152-282020111NaN20177.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
2Advanced Optical Technologies2192-85762192-858420121NaN20176.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
3Advances in Calculus of Variations1864-82581864-826620081NaN201710.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
4Advances in Geometry1615-715X1615-716820011NaN201717.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
..................................................................
339Zeitschrift für Religionswissenschaft0943-86102194-508X19931NaN201725.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
340Zeitschrift für romanische Philologie0049-86611865-906318771NaN2017133.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
341Zeitschrift für Slawistik0044-35062196-701619561NaN201762.0NaNhttps://www.degruyter.com/openurl?genre=journa......De Gruyter (A)serialNaNNaNNaNNaNNaNNaNNaNP
342Zeitschrift für Sprachwissenschaft0721-90671613-370619821NaN201736.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNF
343Zeitschrift für Unternehmens- und Gesellschaft...0340-24791612-704819721NaN201746.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
-

344 rows × 25 columns

-
- - - - -```python -# ajout des infos de preservation des Licences nationales -nlch3 = pd.read_excel('licences_nationales/oxford_Switzerland_NationalLicences_2020-09-24.xlsx') -nlch3 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publication_titleprint_identifieronline_identifierdate_first_issue_onlinenum_first_vol_onlinenum_first_issue_onlinedate_last_issue_onlinenum_last_vol_onlinenum_last_issue_onlinetitle_url...publisher_namepublication_typedate_monograph_published_printdate_monograph_published_onlinemonograph_volumemonograph_editionfirst_editorparent_publication_title_idpreceding_publication_title_idaccess_type
0Acta Biochimica et Biophysica Sinica1672-91451745-7270201547.0NaN2018NaNNaNhttps://academic.oup.com/abbs...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
1Archives of Clinical Neuropsychology0887-61771873-584319861.0NaN2018NaNNaNhttps://academic.oup.com/acn...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
2Adaptation1755-06371755-064520158.0NaN2018NaNNaNhttps://academic.oup.com/adaptation...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
3American Entomologist1046-28212155-9902199036.0NaN2018NaNNaNhttps://academic.oup.com/ae...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
4Applied Economic Perspectives and Policy1058-71951467-935319881.0NaN2018NaNNaNhttps://academic.oup.com/aepp...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
343The Chinese Journal of Comparative Law2050-48022050-481020186.0NaN2018NaNNaNhttps://academic.oup.com/cjcl...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
344Journal of Nutrition0022-31661541-61002018148.0NaN2018NaNNaNhttps://academic.oup.com/jn...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
345Translational Behavioral Medicine1869-67161613-986020188.0NaN2018NaNNaNhttps://academic.oup.com/tbm...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
346The Western Historical Quarterly0043-38101939-8603201647.0NaN2018NaNNaNhttps://academic.oup.com/whq...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
347Zoological Journal of the Linnean Society0024-40821096-36422017179.0NaN2018NaNNaNhttps://academic.oup.com/zoolinnean...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
-

348 rows × 25 columns

-
- - - - -```python -# ajout des infos de preservation des Licences nationales -nlch4 = pd.read_excel('licences_nationales/springer_Switzerland_NationalLicences_2020-08-12.xlsx') -nlch4 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publication_titleprint_identifieronline_identifierdate_first_issue_onlinenum_first_vol_onlinenum_first_issue_onlinedate_last_issue_onlinenum_last_vol_onlinenum_last_issue_onlinetitle_url...coverage_notespublisher_namepublication_typedate_monograph_published_printdate_monograph_published_onlinemonograph_volumemonograph_editionfirst_editorparent_publication_title_idpreceding_publication_title_id
04OR1619-45001614-241120053.01.02015NaNNaNhttp://link.springer.com/journal/10288...NaNSpringer Berlin HeidelbergSerialNaNNaNNaNNaNNaNNaNNaN
1AAPS PharmSciTechNaN1530-993220056.01.02015NaNNaNhttp://link.springer.com/journal/12249...NaNSpringer USSerialNaNNaNNaNNaNNaNNaNNaN
2ADHD Attention Deficit and Hyperactivity Disor...1866-61161866-664720091.01.02014NaNNaNhttp://link.springer.com/journal/12402...NaNSpringer ViennaSerialNaNNaNNaNNaNNaNNaNNaN
3AI & SOCIETY0951-56661435-565519871.01.02015NaNNaNhttp://link.springer.com/journal/146...NaNSpringer LondonSerialNaNNaNNaNNaNNaNNaNNaN
4AIDS and Behavior1090-71651573-325420059.01.02015NaNNaNhttp://link.springer.com/journal/10461...NaNSpringer USSerialNaNNaNNaNNaNNaNNaNNaN
..................................................................
1667neurogenetics1364-67451364-675320056.01.02015NaNNaNhttp://link.springer.com/journal/10048...NaNSpringer Berlin HeidelbergSerialNaNNaNNaNNaNNaNNaNNaN
1668uwf UmweltWirtschaftsForum | Sustainability Ma...0943-34811432-2293200715.01.02015NaNNaNhttp://link.springer.com/journal/550...NaNSpringer Berlin HeidelbergSerialNaNNaNNaNNaNNaNNaNNaN
1669Österreichische Wasser- und Abfallwirtschaft0945-358X1613-7566200557.01.02015NaNNaNhttp://link.springer.com/journal/506...NaNSpringer ViennaSerialNaNNaNNaNNaNNaNNaNNaN
1670Österreichische Zeitschrift für Soziologie1011-00701862-2585200530.01.02015NaNNaNhttp://link.springer.com/journal/11614...NaNSpringer Fachmedien WiesbadenSerialNaNNaNNaNNaNNaNNaNNaN
1671Journal Applied Mathematics Computing1598-58651865-20851905NaNNaN1905NaNNaNhttp://link.springer.com/journal/12190...NaNSpringerSerialNaNNaNNaNNaNNaNNaNNaN
-

1672 rows × 24 columns

-
- - - - -```python -# concatener les 4 -nlch = pd.concat([nlch1, nlch2, nlch3, nlch4], ignore_index=True) -nlch -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version - of pandas will change to not sort by default. - - To accept the future behavior, pass 'sort=False'. - - To retain the current behavior and silence the warning, pass 'sort=True'. - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
access_typecoverage_depthcoverage_notesdate_first_issue_onlinedate_last_issue_onlinedate_monograph_published_onlinedate_monograph_published_printembargo_infofirst_authorfirst_editor...num_last_vol_onlineonline_identifierparent_publication_title_idpreceding_publication_title_idprint_identifierpublication_titlepublication_typepublisher_nametitle_idtitle_url
0NaNfulltextNaN19692015NaNNaNNaNNaNNaN...47.0NaNNaNNaN1074-0708Journal of Agricultural and Applied EconomicsserialCambridge University Pressaaehttp://www.cambridge.org/core/product/identifi...
1NaNfulltextNaN20112015NaNNaNNaNNaNNaN...8.02075-1354NaNNaN2070-0733Advances in Applied Mathematics and MechanicsserialCambridge University Pressaamhttp://www.cambridge.org/core/product/identifi...
2NaNfulltextNaN20062015NaNNaNNaNNaNNaN...9.01748-5002NaNNaN1748-4995Annals of Actuarial ScienceserialCambridge University Pressaashttp://www.cambridge.org/core/product/identifi...
3NaNfulltextNaN20102015NaNNaNNaNNaNNaN...6.02040-4719NaNNaN2040-4700Advances in Animal BiosciencesserialCambridge University Pressabshttp://www.cambridge.org/core/product/identifi...
4NaNfulltextNaN17701992NaNNaNNaNNaNNaN...110.0NaNNaNNaN0261-3409ArchaeologiaserialCambridge University Pressachhttp://www.cambridge.org/core/product/identifi...
..................................................................
2753NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaN1364-6753NaNNaN1364-6745neurogeneticsSerialSpringer Berlin Heidelberg10048http://link.springer.com/journal/10048
2754NaNfulltextNaN20072015NaNNaNNaNNaNNaN...NaN1432-2293NaNNaN0943-3481uwf UmweltWirtschaftsForum | Sustainability Ma...SerialSpringer Berlin Heidelberg550http://link.springer.com/journal/550
2755NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaN1613-7566NaNNaN0945-358XÖsterreichische Wasser- und AbfallwirtschaftSerialSpringer Vienna506http://link.springer.com/journal/506
2756NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaN1862-2585NaNNaN1011-0070Österreichische Zeitschrift für SoziologieSerialSpringer Fachmedien Wiesbaden11614http://link.springer.com/journal/11614
2757NaNfulltextNaN19051905NaNNaNNaNNaNNaN...NaN1865-2085NaNNaN1598-5865Journal Applied Mathematics ComputingSerialSpringer12190http://link.springer.com/journal/12190
-

2758 rows × 26 columns

-
- - - - -```python -nlch.columns -``` - - - - - Index(['access_type', 'coverage_depth', 'coverage_notes', - 'date_first_issue_online', 'date_last_issue_online', - 'date_monograph_published_online', 'date_monograph_published_print', - 'embargo_info', 'first_author', 'first_editor', 'monograph_edition', - 'monograph_volume', 'notes', 'num_first_issue_online', - 'num_first_vol_online', 'num_last_issue_online', 'num_last_vol_online', - 'online_identifier', 'parent_publication_title_id', - 'preceding_publication_title_id', 'print_identifier', - 'publication_title', 'publication_type', 'publisher_name', 'title_id', - 'title_url'], - dtype='object') - - - - -```python -# ajout ISSNL -nlch['issn'] = nlch['online_identifier'] -nlch.loc[nlch['online_identifier'].isna(), 'issn'] = nlch['print_identifier'] -nlch -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
access_typecoverage_depthcoverage_notesdate_first_issue_onlinedate_last_issue_onlinedate_monograph_published_onlinedate_monograph_published_printembargo_infofirst_authorfirst_editor...online_identifierparent_publication_title_idpreceding_publication_title_idprint_identifierpublication_titlepublication_typepublisher_nametitle_idtitle_urlissn
0NaNfulltextNaN19692015NaNNaNNaNNaNNaN...NaNNaNNaN1074-0708Journal of Agricultural and Applied EconomicsserialCambridge University Pressaaehttp://www.cambridge.org/core/product/identifi...1074-0708
1NaNfulltextNaN20112015NaNNaNNaNNaNNaN...2075-1354NaNNaN2070-0733Advances in Applied Mathematics and MechanicsserialCambridge University Pressaamhttp://www.cambridge.org/core/product/identifi...2075-1354
2NaNfulltextNaN20062015NaNNaNNaNNaNNaN...1748-5002NaNNaN1748-4995Annals of Actuarial ScienceserialCambridge University Pressaashttp://www.cambridge.org/core/product/identifi...1748-5002
3NaNfulltextNaN20102015NaNNaNNaNNaNNaN...2040-4719NaNNaN2040-4700Advances in Animal BiosciencesserialCambridge University Pressabshttp://www.cambridge.org/core/product/identifi...2040-4719
4NaNfulltextNaN17701992NaNNaNNaNNaNNaN...NaNNaNNaN0261-3409ArchaeologiaserialCambridge University Pressachhttp://www.cambridge.org/core/product/identifi...0261-3409
..................................................................
2753NaNfulltextNaN20052015NaNNaNNaNNaNNaN...1364-6753NaNNaN1364-6745neurogeneticsSerialSpringer Berlin Heidelberg10048http://link.springer.com/journal/100481364-6753
2754NaNfulltextNaN20072015NaNNaNNaNNaNNaN...1432-2293NaNNaN0943-3481uwf UmweltWirtschaftsForum | Sustainability Ma...SerialSpringer Berlin Heidelberg550http://link.springer.com/journal/5501432-2293
2755NaNfulltextNaN20052015NaNNaNNaNNaNNaN...1613-7566NaNNaN0945-358XÖsterreichische Wasser- und AbfallwirtschaftSerialSpringer Vienna506http://link.springer.com/journal/5061613-7566
2756NaNfulltextNaN20052015NaNNaNNaNNaNNaN...1862-2585NaNNaN1011-0070Österreichische Zeitschrift für SoziologieSerialSpringer Fachmedien Wiesbaden11614http://link.springer.com/journal/116141862-2585
2757NaNfulltextNaN19051905NaNNaNNaNNaNNaN...1865-2085NaNNaN1598-5865Journal Applied Mathematics ComputingSerialSpringer12190http://link.springer.com/journal/121901865-2085
-

2758 rows × 27 columns

-
- - - - -```python -nlch = pd.merge(nlch, df_issnl, on='issn', how='left') -nlch -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
access_typecoverage_depthcoverage_notesdate_first_issue_onlinedate_last_issue_onlinedate_monograph_published_onlinedate_monograph_published_printembargo_infofirst_authorfirst_editor...parent_publication_title_idpreceding_publication_title_idprint_identifierpublication_titlepublication_typepublisher_nametitle_idtitle_urlissnissnl
0NaNfulltextNaN19692015NaNNaNNaNNaNNaN...NaNNaN1074-0708Journal of Agricultural and Applied EconomicsserialCambridge University Pressaaehttp://www.cambridge.org/core/product/identifi...1074-07081074-0708
1NaNfulltextNaN20112015NaNNaNNaNNaNNaN...NaNNaN2070-0733Advances in Applied Mathematics and MechanicsserialCambridge University Pressaamhttp://www.cambridge.org/core/product/identifi...2075-13542070-0733
2NaNfulltextNaN20062015NaNNaNNaNNaNNaN...NaNNaN1748-4995Annals of Actuarial ScienceserialCambridge University Pressaashttp://www.cambridge.org/core/product/identifi...1748-50021748-4995
3NaNfulltextNaN20102015NaNNaNNaNNaNNaN...NaNNaN2040-4700Advances in Animal BiosciencesserialCambridge University Pressabshttp://www.cambridge.org/core/product/identifi...2040-47192040-4700
4NaNfulltextNaN17701992NaNNaNNaNNaNNaN...NaNNaN0261-3409ArchaeologiaserialCambridge University Pressachhttp://www.cambridge.org/core/product/identifi...0261-34090261-3409
..................................................................
2753NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaNNaN1364-6745neurogeneticsSerialSpringer Berlin Heidelberg10048http://link.springer.com/journal/100481364-67531364-6745
2754NaNfulltextNaN20072015NaNNaNNaNNaNNaN...NaNNaN0943-3481uwf UmweltWirtschaftsForum | Sustainability Ma...SerialSpringer Berlin Heidelberg550http://link.springer.com/journal/5501432-22930943-3481
2755NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaNNaN0945-358XÖsterreichische Wasser- und AbfallwirtschaftSerialSpringer Vienna506http://link.springer.com/journal/5061613-75660945-358X
2756NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaNNaN1011-0070Österreichische Zeitschrift für SoziologieSerialSpringer Fachmedien Wiesbaden11614http://link.springer.com/journal/116141862-25851011-0070
2757NaNfulltextNaN19051905NaNNaNNaNNaNNaN...NaNNaN1598-5865Journal Applied Mathematics ComputingSerialSpringer12190http://link.springer.com/journal/121901865-20851598-5865
-

2758 rows × 28 columns

-
- - - - -```python -# test des lignes sans merge -nlch.loc[nlch['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
access_typecoverage_depthcoverage_notesdate_first_issue_onlinedate_last_issue_onlinedate_monograph_published_onlinedate_monograph_published_printembargo_infofirst_authorfirst_editor...parent_publication_title_idpreceding_publication_title_idprint_identifierpublication_titlepublication_typepublisher_nametitle_idtitle_urlissnissnl
37NaNfulltextNaN19592006NaNNaNNaNNaNNaN...NaNNaN1357-7298Animal scienceserialCambridge University Pressaschttp://www.cambridge.org/core/product/identifi...1748-748XNaN
52NaNfulltextNaN19572015NaNNaNNaNNaNNaN...NaNNaN2055-7973British Catholic HistoryserialCambridge University Pressbchhttp://www.cambridge.org/core/product/identifi...2055-7981NaN
76NaNfulltextNaN18822015NaNNaNNaNNaNNaN...NaNNaN1750-2705Cambridge Classical JournalserialCambridge University Pressccjhttp://www.cambridge.org/core/product/identifi...2047-993XNaN
110NaNfulltextNaN20112015NaNNaNNaNNaNNaN...NaNNaN2079-7362East Asian Journal on Applied MathematicsserialCambridge University Presseamhttp://www.cambridge.org/core/product/identifi...2079-7370NaN
152NaNfulltextNaN19802015NaNNaNNaNNaNNaN...NaNNaN2051-5367Hegel BulletinserialCambridge University Presshglhttp://www.cambridge.org/core/product/identifi...2051-5375NaN
194NaNfulltextNaN19912015NaNNaNNaNNaNNaN...NaNNaN2055-6365Journal of Psychologists and Counsellors in Sc...serialCambridge University Pressjgchttp://www.cambridge.org/core/product/identifi...2055-6373NaN
200NaNfulltextNaN19111993NaNNaNNaNNaNNaN...NaNNaN2049-9299Journal of the Staple Inn Actuarial SocietyserialCambridge University Pressjishttp://www.cambridge.org/core/product/identifi...2059-6162NaN
267NaNfulltextNaN20092015NaNNaNNaNNaNNaN...NaNNaN0016-7746Netherlands Journal of Geosciences / Geologie ...serialCambridge University Pressnjghttp://www.cambridge.org/core/product/identifi...1573-9708NaN
278NaNfulltextNaN20082015NaNNaNNaNNaNNaN...NaNNaNNaNAustralasian Journal of Organisational PsychologyserialCambridge University Pressorphttp://www.cambridge.org/core/product/identifi...2054-2232NaN
375NaNfulltextNaN17882015NaNNaNNaNNaNNaN...NaNNaN1755-6910Earth and environmental science transactions o...serialRoyal Society of Edinburgh Scotland Foundationtrehttp://www.cambridge.org/core/product/identifi...1755-6929NaN
405PfulltextNaN18552017NaNNaNNaNNaNNaN...NaNNaN0341-289XAnnalen des Historischen Vereins für den Niede...yearbookBöhlau Verlag2194-3818https://www.degruyter.com/openurl?genre=journa...2194-3818NaN
411PfulltextNaN19552017NaNNaNNaNNaNNaN...NaNNaN0066-6297Archiv für Diplomatik, Schriftgeschichte, Sieg...yearbookBöhlau Verlag2194-5020https://www.degruyter.com/openurl?genre=journa...2194-5020NaN
413PfulltextNaN19032017NaNNaNNaNNaNNaN...NaNNaN0003-9233Archiv für KulturgeschichteserialBöhlau Verlag2194-3958https://www.degruyter.com/openurl?genre=journa...2194-3958NaN
418PfulltextNaN18762017NaNNaNNaNNaNNaN...NaNNaN0003-9497Archivalische ZeitschriftserialBöhlau Verlag2194-3826https://www.degruyter.com/openurl?genre=journa...2194-3826NaN
427PfulltextNaN19482017NaNNaNNaNNaNNaN...NaNNaN0006-2456Bildung und ErziehungserialBöhlau Verlag2194-3834https://www.degruyter.com/openurl?genre=journa...2194-3834NaN
458PfulltextNaN18672017NaNNaNNaNNaNNaN...NaNNaN0070-444XDeutsches Dante-JahrbuchyearbookDe Gruyter2194-4059https://www.degruyter.com/openurl?genre=journa...2194-4059NaN
468PfulltextNaN19942017NaNNaNNaNNaNNaN...NaNNaN2566-9095Etruscan and Italic StudiesserialDe Gruyter2566-9109https://www.degruyter.com/openurl?genre=journa...2566-9109NaN
479PfulltextNaN20052017NaNNaNNaNNaNNaN...NaNNaN2567-4765FinanzRundschauserialVerlag Dr. Otto Schmidt2567-4897https://www.degruyter.com/openurl?genre=journa...2567-4897NaN
530PfulltextNaN19692017NaNNaNNaNNaNNaN...NaNNaN0074-9818Internationales Jahrbuch der ErwachsenenbildungyearbookBöhlau Verlag2194-3699https://www.degruyter.com/openurl?genre=journa...2194-3699NaN
537PfulltextNaN19122017NaNNaNNaNNaNNaN...NaNNaN0341-9320Jahrbuch des Kölnischen GeschichtsvereinsyearbookBöhlau Verlag2198-0675https://www.degruyter.com/openurl?genre=journa...2198-0675NaN
561PfulltextNaN20122017NaNNaNNaNNaNNaN...NaNNaN2194-6345Journal of Econometric MethodsserialDe Gruyter2156-6674https://www.degruyter.com/openurl?genre=journa...2156-6674NaN
570FfulltextNaN19772017NaNNaNNaNNaNNaN...NaNNaN2567-9430Journal of Laboratory MedicineserialDe Gruyter2567-9449https://www.degruyter.com/openurl?genre=journa...2567-9449NaN
675PfulltextNaN19502017NaNNaNNaNNaNNaN...NaNNaN0080-5319SaeculumserialBöhlau Verlag2194-4075https://www.degruyter.com/openurl?genre=journa...2194-4075NaN
708PfulltextNaN20052017NaNNaNNaNNaNNaN...NaNNaN2363-4774World Political ScienceserialDe Gruyter2363-4782https://www.degruyter.com/openurl?genre=journa...2363-4782NaN
709PfulltextNaN20142017NaNNaNNaNNaNNaN...NaNNaN2196-6249Yearbook for European Jewish Literature StudiesyearbookDe Gruyter2196-6257https://www.degruyter.com/openurl?genre=journa...2196-6257NaN
712PfulltextNaN18612017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
713PfulltextNaN19112017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
714PfulltextNaN18802017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
766NaNfulltextNaN20152018NaNNaNNaNNaNNaN...NaNNaN2041-2649Briefings in Functional GenomicsserialOxford University Pressbfgphttps://academic.oup.com/bfgp2041-2647NaN
890NaNfulltextNaN19222018NaNNaNNaNNaNNaN...NaNNaN0021-924XThe Journal of BiochemistryserialOxford University Pressjbchemhttps://academic.oup.com/jb-NaN
926NaNfulltextNaN18892018NaNNaNNaNNaNNaN...NaNNaN0024-2160The LibraryserialOxford University Presslibrajhttps://academic.oup.com/library-NaN
1010NaNfulltextNaN19771992NaNNaNNaNNaNNaN...NaNNaN0148-0847Social Work Research and AbstractsserialOxford University Pressswrahttps://academic.oup.com/swra1001-3412NaN
1057NaNfulltextNaN20172018NaNNaNNaNNaNNaN...NaNNaN0021-972XThe Journal of Clinical Endocrinology & Metabo...serialOxford University Pressjcemhttps://academic.oup.com/jcem1845-7197NaN
1074NaNfulltextNaN20182018NaNNaNNaNNaNNaN...NaNNaN2398-4910Perspectives on Public Management and GovernanceserialOxford University Pressppmghttps://academic.oup.com/ppmg2398-4929NaN
1094NaNfulltextNaN19762015NaNNaNNaNNaNNaN...NaNNaN2366-004XAbdominal RadiologySerialSpringer US261http://link.springer.com/journal/2612366-0058NaN
1105NaNfulltextNaN19821985NaNNaNNaNNaNNaN...NaNNaN0253-486XGeochemistrySerialScience Press11631http://link.springer.com/journal/11631NaN
1148NaNfulltextNaN19752004NaNNaNNaNNaNNaN...NaNNaN1066-2316American Journal of Criminal JusticeSerialSpringer US12103http://link.springer.com/journal/121031936-1351NaN
1218NaNfulltextNaN20062015NaNNaNNaNNaNNaN...NaNNaN1862-3522Archives of OsteoporosisSerialSpringer London11657http://link.springer.com/journal/116571862-3514NaN
1363NaNfulltextNaN19952002NaNNaNNaNNaNNaN...NaNNaN1006-6497Chinese journal of integrated traditional and ...SerialSpringer Berlin Heidelberg11655http://link.springer.com/journal/11655NaN
1365NaNfulltextNaN20092015NaNNaNNaNNaNNaN...NaNNaN0256-7679Chinese Journal of Polymer ScienceSerialChinese Chemical Society and Institute of Chem...10118http://link.springer.com/journal/101181439-6203NaN
1382NaNfulltextNaN19831994NaNNaNNaNNaNNaN...NaNNaN0731-8235Clinical reviews in allergySerialSpringer US12016http://link.springer.com/journal/12016NaN
1383NaNfulltextNaN19822015NaNNaNNaNNaNNaN...NaNNaN0770-3198Clinical RheumatologySerialSpringer London10067http://link.springer.com/journal/100671434-9949NaN
1938NaNfulltextNaN20082015NaNNaNNaNNaNNaN...NaNNaN1936-1521Journal of Child & Adolescent TraumaSerialSpringer International Publishing40653http://link.springer.com/journal/406531936-153XNaN
2003NaNfulltextNaN19862015NaNNaNNaNNaNNaN...NaNNaN0884-8734Journal of General Internal MedicineSerialSpringer US11606http://link.springer.com/journal/116061525-1497NaN
2136NaNfulltextNaN20062015NaNNaNNaNNaNNaN...NaNNaN1009-6124Journal of Systems Science and ComplexitySerialAcademy of Mathematics and Systems Science, Ch...11424http://link.springer.com/journal/114241559-7067NaN
2255NaNfulltextNaN19742015NaNNaNNaNNaNNaN...NaNNaN0095-3628Microbial EcologySerialSpringer US248http://link.springer.com/journal/2481432-184XNaN
2355NaNfulltextNaN19921995NaNNaNNaNNaNNaN...NaNNaN0941-2530Orthopedics and TraumatologySerialUrban & Vogel65http://link.springer.com/journal/651617-3838NaN
2674NaNfulltextNaN18831887NaNNaNNaNNaNNaN...NaNNaNNaNTransactions of the Academy of Medicine in Ire...SerialSpringer-Verlag12680http://link.springer.com/journal/12680NaNNaN
-

48 rows × 28 columns

-
- - - - -```python -# utiliser l'ISSN à la place sur ces lignes -nlch.loc[nlch['issnl'].isna(), 'issnl'] = nlch['issn'] -``` - - -```python -# test des lignes sans merge -nlch.loc[nlch['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
access_typecoverage_depthcoverage_notesdate_first_issue_onlinedate_last_issue_onlinedate_monograph_published_onlinedate_monograph_published_printembargo_infofirst_authorfirst_editor...parent_publication_title_idpreceding_publication_title_idprint_identifierpublication_titlepublication_typepublisher_nametitle_idtitle_urlissnissnl
712PfulltextNaN18612017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
713PfulltextNaN19112017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
714PfulltextNaN18802017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
2674NaNfulltextNaN18831887NaNNaNNaNNaNNaN...NaNNaNNaNTransactions of the Academy of Medicine in Ire...SerialSpringer-Verlag12680http://link.springer.com/journal/12680NaNNaN
-

4 rows × 28 columns

-
- - - - -```python -# ajout des infos de nlch : -# publication_title -nlch_for_merge = nlch[['issnl', 'publication_title']] -nlch_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlpublication_title
01074-0708Journal of Agricultural and Applied Economics
12070-0733Advances in Applied Mathematics and Mechanics
21748-4995Annals of Actuarial Science
32040-4700Advances in Animal Biosciences
40261-3409Archaeologia
.........
27531364-6745neurogenetics
27540943-3481uwf UmweltWirtschaftsForum | Sustainability Ma...
27550945-358XÖsterreichische Wasser- und Abfallwirtschaft
27561011-0070Österreichische Zeitschrift für Soziologie
27571598-5865Journal Applied Mathematics Computing
-

2758 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -nlch_for_merge = nlch_for_merge.rename(columns={'publication_title' : 'nlch_title'}) -nlch_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlnlch_title
01074-0708Journal of Agricultural and Applied Economics
12070-0733Advances in Applied Mathematics and Mechanics
21748-4995Annals of Actuarial Science
32040-4700Advances in Animal Biosciences
40261-3409Archaeologia
.........
27531364-6745neurogenetics
27540943-3481uwf UmweltWirtschaftsForum | Sustainability Ma...
27550945-358XÖsterreichische Wasser- und Abfallwirtschaft
27561011-0070Österreichische Zeitschrift für Soziologie
27571598-5865Journal Applied Mathematics Computing
-

2758 rows × 2 columns

-
- - - - -```python -# merge avec journals -journals = pd.merge(journals, nlch_for_merge, on='issnl', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_title
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0NaN0.0NaN
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved1.0NaN
............................................................
9939970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0preserved1.0NaN
9949980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN
9959991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields
99610000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0NaN
99710010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0NaN
-

998 rows × 19 columns

-
- - - - -```python -# ajouter info sur la presence sur portico -journals.loc[journals['nlch_title'].isna(), 'nlch'] = 0 -journals.loc[~journals['nlch_title'].isna(), 'nlch'] = 1 -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlch
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0NaN0.0NaN0.0
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0
...............................................................
9939970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0
9949980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0
9959991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0
99610000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0
99710010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0
-

998 rows × 20 columns

-
- - - -### QOAM - - -```python -# ouverture du fichier -qoam = pd.read_csv('qoam/qoam_not_zero.tsv', encoding='utf-8', header=0, sep='\t') -qoam -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnqoam_av_score
02254-58835.0
12279-72545.0
22317-30765.0
32525-34685.0
41339-84745.0
.........
30182083-48101.0
30191759-22081.0
30200219-98741.0
30212083-61391.0
30222312-27571.0
-

3023 rows × 2 columns

-
- - - - -```python -qoam = pd.merge(qoam, df_issnl, on='issn', how='left') -qoam -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnqoam_av_scoreissnl
02254-58835.02254-5883
12279-72545.02279-7254
22317-30765.02317-3076
32525-34685.02525-3468
41339-84745.01339-8474
............
30182083-48101.02083-4810
30191759-22081.01759-2208
30200219-98741.00219-9874
30212083-61391.02083-6139
30222312-27571.02312-2757
-

3023 rows × 3 columns

-
- - - - -```python -# test des lignes sans merge -qoam.loc[qoam['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnqoam_av_scoreissnl
242163-11824.50NaN
732292-13544.00NaN
772571-51354.00NaN
902201-568X4.00NaN
3021687-921X3.50NaN
4052391-54123.25NaN
4382668-05723.25NaN
8012391-54203.00NaN
8032391-54473.00NaN
8142391-54553.00NaN
8152391-54713.00NaN
11002516-31592.75NaN
12162289-56392.50NaN
12282211-38352.50NaN
15061658-35582.25NaN
15502214-62962.25NaN
19601687-52572.00NaN
19751687-56992.00NaN
21402056-33152.00NaN
21502083-36362.00NaN
21892366-00581.75NaN
21982450-69661.75NaN
22541308-69791.75NaN
22671035-76801.75NaN
23002411-96601.75NaN
26112198-26271.25NaN
28042180-27261.00NaN
29792146-05741.00NaN
-
- - - - -```python -# utiliser l'ISSN à la place sur ces lignes -qoam.loc[qoam['issnl'].isna(), 'issnl'] = qoam['issn'] -``` - - -```python -# test des lignes sans merge -qoam.loc[qoam['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - -
issnqoam_av_scoreissnl
-
- - - - -```python -# ajout des infos de qoam : -# publication_title -qoam_for_merge = qoam[['issnl', 'qoam_av_score']] -qoam_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlqoam_av_score
02254-58835.0
12279-72545.0
22317-30765.0
32525-34685.0
41339-84745.0
.........
30182083-48101.0
30191759-22081.0
30200219-98741.0
30212083-61391.0
30222312-27571.0
-

3023 rows × 2 columns

-
- - - - -```python -# merge avec journals -journals = pd.merge(journals, qoam_for_merge, on='issnl', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...doaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_score
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...1Yes1.0PLoS One1.0NaN0.0NaN0.04.035714
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
..................................................................
9959970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
9969980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
9979991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...NaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaN
99810000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
99910010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
-

1000 rows × 21 columns

-
- - - - -```python -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...doaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_score
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...1Yes1.0PLoS One1.0NaN0.0NaN0.04.035714
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
..................................................................
9959970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
9969980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
9979991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...NaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaN
99810000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
99910010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
-

996 rows × 21 columns

-
- - - -## Finalisation de la table journals - - -```python -# test des doublons -journals_doublons = journals[['issn', 'issnl', 'title']].loc[journals.duplicated(subset='issnl')].sort_values(by='issnl') -journals_doublons -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnltitle
921520-51260002-7863Journal of the American Chemical Society (Online)
3931520-68820003-2700Analytical chemistry (Online)
3101077-31180003-6951Applied physics letters (Online)
1671432-07460004-6361Astronomy & astrophysics (Online)
7931542-00860006-3495Biophysical journal (Online)
............
4262050-74962050-7496Journal of materials chemistry. A (Online)
9522050-75342050-7526Journal of materials chemistry. C (Online)
832469-99692469-9950Physical review. B. (Online)
2092470-00292470-0010Physical review. D. (Online)
8402470-00532470-0045Physical review. E (Online)
-

85 rows × 3 columns

-
- - - - -```python -journals_doublons = journals_doublons.loc[journals_doublons['issnl'].notna()] -``` - - -```python -# merge pour voir les lignes avec doublon -journals_doublons['doublon_issnl'] = 1 -journals = pd.merge(journals, journals_doublons[['issnl', 'doublon_issnl']], on='issnl', how='left') -journals.loc[journals['doublon_issnl'] == 1] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...APCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnl
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
560003-69510003-6951Applied physics letters19629999http://scitation.aip.org/aplo/Appl. phys. lett.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
671029-84791029-8479The journal of high energy physics (Online)19979999http://link.springer.com/journal/13130J. high energy phys. (Online)12483...No1.0Journal of High Energy Physics1.0preserved1.0NaN0.0NaN1.0
780002-78630002-7863Journal of the American Chemical Society (Print)18799999http://pubs.acs.org/journals/jacsat/index.htmlJ. Am. Chem. Soc. (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
..................................................................
9449501520-52071520-5207The journal of physical chemistry. B (1997 : O...19979999http://pubs.acs.org/journals/jpcbfk/index.htmlJ. phys. chem., B (1997 : Online)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
9469521361-65280957-4484Nanotechnology (Bristol. Online)19909999http://www.iop.org/Journals/naNanotechnology (Bristol, Online)124234...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
9479531469-76450022-1120Journal of fluid mechanics (Online)19569999http://firstsearch.oclc.orgJ. fluid mech. (Online)124234...NaN0.0NaN0.0preserved1.0Journal of Fluid Mechanics1.0NaN1.0
9489542050-75342050-7526Journal of materials chemistry. C (Online)20139999http://pubs.rsc.org/en/journals/journalissues/tc#J. mater. chem. C (Online)124234...NaN0.0Journal of Materials Chemistry C: Materials fo...1.0preserved1.0NaN0.0NaN1.0
9749801477-09701352-4585Multiple sclerosis (Online)19959999http://www.arnoldpublishers.com/journals/pages...Mult. scler. (Online)124234...NaN0.0Multiple Sclerosis Journal1.0preserved1.0NaN0.01.751.0
-

170 rows × 22 columns

-
- - - - -```python -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...APCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnl
780002-78630002-7863Journal of the American Chemical Society (Print)18799999http://pubs.acs.org/journals/jacsat/index.htmlJ. Am. Chem. Soc. (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
92931520-51260002-7863Journal of the American Chemical Society (Online)18799999http://books.google.com/books?id=ExsEZbIZKjwCJ. Am. Chem. Soc. (Online)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
3933961520-68820003-2700Analytical chemistry (Online)19479999http://pubs.acs.org/journals/ancham/about.htmlAnal. chem. (Online)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
69700003-27000003-2700Analytical chemistry (Washington)19489999http://pubs.acs.org/journals/ancham/index.htmlAnal. chem. (Wash.)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
560003-69510003-6951Applied physics letters19629999http://scitation.aip.org/aplo/Appl. phys. lett.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
..................................................................
40412469-99502469-9950Physical review. B20169999http://journals.aps.org/prbPhys. rev. B.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
79802470-00102470-0010Physical review. D20169999http://journals.aps.org/prdPhys. rev. D.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
2092102470-00292470-0010Physical review. D. (Online)20169999http://journals.aps.org/prdPhys. rev. D. (Online)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
5305332470-00452470-0045Physical review. E (Print)20169999http://journals.aps.org/prePhys. rev., E (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
8368422470-00532470-0045Physical review. E (Online)20169999http://journals.aps.org/prePhys. rev., E (Online)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
-

170 rows × 22 columns

-
- - - - -```python -# export csv des doublons -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl').to_csv('sample/journals_duplicates.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel des doublons -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl').to_excel('sample/journals_duplicates.xlsx', index=False) -``` - - -```python -# suppression des doublons -journals = journals.drop_duplicates(subset=['issnl']) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...APCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnl
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...NaN0.0NaN0.0NaN0.0NaN0.0NaNNaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...Yes1.0PLoS One1.0NaN0.0NaN0.04.035714NaN
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...NaN0.0NaN0.0NaN0.0NaN0.0NaNNaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
..................................................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...NaN0.0NaN0.0preserved1.0NaN0.0NaNNaN
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaNNaN
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...NaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...NaN0.0NaN0.0preserved1.0NaN0.0NaNNaN
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...NaN0.0NaN0.0NaN0.0NaN0.0NaNNaN
-

911 rows × 22 columns

-
- - - - -```python -# ajout du oa_status -# 6 : Diamond -# 5 : Gold -# 4 : Full -# 3 : Hybrid -# 2 : Green -# 1 : UNKNOWN -journals['oa_status'] = 1 -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...doaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...1.0PLoS One1.0NaN0.0NaN0.04.035714NaN1
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
..................................................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN1
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
-

911 rows × 23 columns

-
- - - - -```python -# status 5 pour les revues DOAJ -journals.loc[journals['doaj_status'] == 1, 'oa_status'] = 5 -# status 6 pour les revues DOAJ avec APC = 0 -journals.loc[(journals['doaj_status'] == 1) & (journals['APC'] == 'No'), 'oa_status'] = 6 -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...doaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...1.0PLoS One1.0NaN0.0NaN0.04.035714NaN5
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
..................................................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN1
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
-

911 rows × 23 columns

-
- - - - -```python -journals['oa_status'].value_counts() -``` - - - - - 1 824 - 5 70 - 6 17 - Name: oa_status, dtype: int64 - - - - -```python -# export csv brut -journals.to_csv('sample/journals_brut.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel brut -journals.to_excel('sample/journals_brut.xlsx', index=False) -``` - - -```python -# export csv des ids -journals[['id', 'title', 'issn', 'issnl']].to_csv('sample/journals_ids.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel des ids -journals[['id', 'title', 'issn', 'issnl']].to_excel('sample/journals_ids.xlsx', index=False) -``` - - -```python - -``` diff --git a/import_scripts/03_oacct_journals.py b/import_scripts/03_oacct_journals.py deleted file mode 100644 index 3b63a7b5..00000000 --- a/import_scripts/03_oacct_journals.py +++ /dev/null @@ -1,1062 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Extraction des données des revues -# -# -# ## Corpus initial -# -# ISSNs des revues des publication archivées sur l'AoU UNIGE et sur Infoscience EPFL -# -# * Fichier des ISSNs de l'AoU exporté le 16.10.2020 -# * Fichier des ISSNs de Infoscience exporté le 28.01.2021 -# * Données extraits à partir du JSON de ISSN.org -# - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os -# paramètre pour le nombre de journaux dans le sample (0 pour prendre tout) -journals_sample_n = 1000 - - -# ## Table OA categories -# -# * 1 : UNKNOWN -# * 2 : Green -# * 3 : Hybrid -# * 4 : Full -# * 5 : Gold -# * 6 : Diamond - -# In[2]: - - -# creation du DF -col_names = ['id', - 'status', - 'description', - 'subscription', - 'accepted_manuscript', - 'apc', - 'final_version' - ] -oas = pd.DataFrame(columns = col_names) -oas - - -# In[3]: - - -# ajout des valeurs -oas = oas.append({'id' : 1, 'status' : 'UNKNOWN', 'description' : '', 'subscription' : 0, 'accepted_manuscript' : 0, 'apc' : 0, 'final_version' : 0}, ignore_index=True) -oas = oas.append({'id' : 2, 'status' : 'Green', 'description' : 'Paywalled access journal, usually allows the archive of submitted or accepted version on institutional repositories (embargo periods may apply)', 'subscription' : 1, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 0}, ignore_index=True) -oas = oas.append({'id' : 3, 'status' : 'hybrid', 'description' : 'Paywalled access journal, offers several Open Access upon payment of APCs. It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 1, 'accepted_manuscript' : 1, 'apc' : 1, 'final_version' : 1}, ignore_index=True) -# oas = oas.append({'id' : 4, 'status' : 'Full', 'description' : 'No subscription, Green or Gold', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 1}, ignore_index=True) -oas = oas.append({'id' : 5, 'status' : 'Gold', 'description' : 'Open Access journal (payment of APCs may apply). It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 1, 'final_version' : 1}, ignore_index=True) -oas = oas.append({'id' : 6, 'status' : 'Diamond', 'description' : 'Open Access journal (without payment of APCs). It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 1}, ignore_index=True) - - -# In[4]: - - -oas - - -# In[5]: - - -# esport JSON -result = oas.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/oa.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[6]: - - -# export csv -oas.to_csv('sample/oa.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[7]: - - -# export excel -oas.to_excel('sample/oa.xlsx', index=False) - - -# ## Table Journals - -# In[8]: - - -issns = pd.read_csv('issn/issns_count.tsv', encoding='utf-8', header=0, sep='\t') -issns - - -# In[9]: - - -# ajout des colonnes -issns.insert(0, 'id', '', False) -issns - - -# In[10]: - - -# convertir l'index en id -issns = issns.reset_index() -issns - - -# In[11]: - - -# ajout de l'id avec l'index + 1 -issns['id'] = issns['index'] + 1 -del issns['index'] -issns - - -# In[12]: - - -# reduction à X journaux pour l'échantillon de test -if journals_sample_n > 0 : - issns = issns.loc[:journals_sample_n] -issns - - -# In[13]: - - -# ajout des ISSN-L -df_issnl = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -df_issnl - - -# In[14]: - - -# renommer les colonnes -df_issnl = df_issnl.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) - - -# In[15]: - - -issns = pd.merge(issns, df_issnl, on='issn', how='left') -issns - - -# In[16]: - - -# creation du DF -# 'oa_status' supprimé pour le moment -col_names = ['id', - 'issn', - 'issnl', - 'title', - 'starting_year', - 'end_year', - 'url', - 'name_short_iso_4' - ] -journals = pd.DataFrame(columns = col_names) -journals - - -# In[17]: - - -# creation du DF -col_names = ['id', 'iso_code'] -journals_languages = pd.DataFrame(columns = col_names) -journals_languages - - -# In[18]: - - -# creation du DF -# 'oa_status' supprimé -col_names = ['id', 'iso_code'] -journals_countries = pd.DataFrame(columns = col_names) -journals_countries - - -# In[19]: - - -# extraction des informations à partir des données ISSN.org -for index, row in issns.iterrows(): - myid = row['id'] - myissn = row['issn'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - issnl = np.nan - title = '' - keytitle = '' - starting_year = np.nan - end_year = np.nan - myurl = np.nan - journal_country = np.nan - journal_language = np.nan - keytitle_abbr = np.nan - # export en json - if os.path.exists('issn/data/' + myissn + '.json'): - with open('issn/data/' + myissn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + myissn): - if ('mainTitle' in x): - title = x['mainTitle'] - else : - if ('name' in x): - title = x['name'] - # print(myissn) - if ('startDate' in x): - starting_year = x['startDate'] - if ('endDate' in x): - end_year = x['endDate'] - if ('url' in x): - urls = x['url'] - if type(urls) is list: - for url in urls: - # Filtrer les URLs des archives : - # www.ncbi.nlm.nih.gov/pmc/* - # www.pubmedcentral.gov/* - # pubmedcentral.nih.gov/* - # bibpurl.oclc.org/* - # www.jstor.org/* - # ieeexplore.ieee.org - # ovidsp.ovid.com - # et garder le premier des restants - myurl = url - if ('ncbi.nlm.nih.gov' not in url - and 'pubmedcentral' not in url - and 'bibpurl.oclc.org' not in url - and 'jstor.org' not in url - and 'ieeexplore.ieee.org' not in url - and 'ovidsp.ovid.com' not in url): - break - else : - myurl = x['url'] - if ('spatial' in x): - countries = x['spatial'] - if type(countries) is list: - for country in countries: - if ('https://www.iso.org/obp/ui/#iso:code:3166:' in country): - journal_country = country[-2:] - journals_countries = journals_countries.append({'id' : myid, 'iso_code' : journal_country}, ignore_index=True) - else : - if ('https://www.iso.org/obp/ui/#iso:code:3166:' in countries): - journal_country = countries[-2:] - journals_countries = journals_countries.append({'id' : myid, 'iso_code' : journal_country}, ignore_index=True) - # langue "inLanguage": "http://id.loc.gov/vocabulary/iso639-2/eng", - if ('inLanguage' in x): - languages = x['inLanguage'] - if type(languages) is list: - for language in languages: - journal_language = language[-3:] - journals_languages = journals_languages.append({'id' : myid, 'iso_code' : journal_language}, ignore_index=True) - else : - journal_language = languages[-3:] - journals_languages = journals_languages.append({'id' : myid, 'iso_code' : journal_language}, ignore_index=True) - if (x['@id'] == 'resource/ISSN/' + myissn + '#KeyTitle'): - if ('value' in x): - keytitle = x['value'] - if (x['@id'] == 'resource/ISSN/' + myissn + '#ISSN-L'): - if ('value' in x): - issnl = x['value'] - # "@id": "resource/ISSN/1098-0121#AbbreviatedKeyTitle", - if (x['@id'] == 'resource/ISSN/' + myissn + '#AbbreviatedKeyTitle'): - if ('value' in x): - mykeytitle_abbrs = x['value'] - if type(mykeytitle_abbrs) is list: - for mykeytitle_abbr in mykeytitle_abbrs: - print(myissn + ' - AbbreviatedKeyTitle is a list ' + mykeytitle_abbr) - keytitle_abbr = mykeytitle_abbr - with open('sample/03_journals_issn_multiple_titles.txt', 'a', encoding='utf-8') as g: - g.write(myissn + ' AbbreviatedKeyTitle is a list ' + mykeytitle_abbr + '\n') - break - else : - keytitle_abbr = mykeytitle_abbrs - if keytitle != '' : - title = keytitle - if title != '' : - # supprimer le point à la fin - if (title[-1] == '.'): - title = title[0:-1] - # remplacer les caractères spéciaux ˜The œ - if type(title) is list: - for mytitlei in title: - print(myissn + ' - title is a list ' + mytitlei) - title = str.replace(mytitlei, '˜The œ', 'The ') - with open('sample/03_journals_issn_multiple_titles.txt', 'a', encoding='utf-8') as g: - g.write(myissn + ' title is a list ' + mytitlei + '\n') - break - else : - title = str.replace(title, '˜The œ', 'The ') - else : - print(row['issn'] + ' - not found') - with open('sample/03_journals_issn_errors.txt', 'a', encoding='utf-8') as g: - g.write(row['issn'] + ' not found \n') - journals.at[index,'id'] = myid - journals.at[index,'title'] = title - journals.at[index,'issn'] = myissn - journals.at[index,'issnl'] = issnl - journals.at[index,'starting_year'] = starting_year - journals.at[index,'end_year'] = end_year - journals.at[index,'url'] = myurl - journals.at[index,'name_short_iso_4'] = keytitle_abbr - - -# In[20]: - - -journals - - -# In[21]: - - -# titres vides -journals.loc[journals['title'] == ''] - - -# In[22]: - - -# export csv des titres vides -journals.loc[journals['title'] == ''].to_csv('sample/journals_sans_titre.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[23]: - - -# export excel des ids -journals.loc[journals['title'] == ''].to_excel('sample/journals_sans_titre.xlsx', index=False) - - -# In[24]: - - -# garder les lignes avec titre -journals = journals.loc[journals['title'] != ''] -journals - - -# In[25]: - - -journals.shape[0] - - -# ## Languages - -# In[26]: - - -journals_languages - - -# In[27]: - - -# ouvrir la table des langues -languages = pd.read_csv('sample/language.tsv', encoding='utf-8', header=0, sep='\t') -languages - - -# In[28]: - - -# renommer les colonnes -del languages['name'] -languages = languages.rename(columns={'id' : 'language'}) - - -# In[29]: - - -# merge avec languages -journals_languages = pd.merge(journals_languages, languages, on='iso_code', how='left') -journals_languages - - -# In[30]: - - -# concat valeurs avec même id -journals_languages['language'] = journals_languages['language'].astype(str) -journals_languages = journals_languages.groupby('id').agg({'language': lambda x: ', '.join(x)}) -journals_languages - - -# In[31]: - - -# recuperation de l'id des langues -journals = pd.merge(journals, journals_languages, on='id', how='left') -journals - - -# ## Countries - -# In[32]: - - -journals_countries - - -# In[33]: - - -# ouvrir la table des pays -country = pd.read_csv('sample/country.tsv', encoding='utf-8', header=0, sep='\t') -country - - -# In[34]: - - -# renommer les colonnes -del country['name'] -country = country.rename(columns={'id' : 'country'}) - - -# In[35]: - - -# merge avec countries -journals_countries = pd.merge(journals_countries, country, on='iso_code', how='left') -journals_countries - - -# In[36]: - - -# concat valeurs avec même id -journals_countries['country'] = journals_countries['country'].astype(str) -journals_countries = journals_countries.groupby('id').agg({'country': lambda x: ', '.join(x)}) -journals_countries - - -# In[37]: - - -# recuperation de l'id des langues -journals = pd.merge(journals, journals_countries, on='id', how='left') -journals - - -# ### DOAJ - -# In[38]: - - -# ajout de DOAJ info -doaj = pd.read_csv('doaj/journalcsv__doaj_20210312_0636_utf8.csv', encoding='utf-8', header=0) -doaj - - -# In[39]: - - -# ajout ISSNL -doaj['issn'] = doaj['Journal ISSN (print version)'] -doaj.loc[doaj['issn'].isna(), 'issn'] = doaj['Journal EISSN (online version)'] -doaj - - -# In[40]: - - -doaj = pd.merge(doaj, df_issnl, on='issn', how='left') -doaj - - -# In[41]: - - -doaj.columns - - -# In[42]: - - -doaj['Preservation Services'] - - -# In[43]: - - -doaj['DOAJ Seal'] - - -# In[44]: - - -doaj['issnl'] - - -# In[45]: - - -doaj['APC'].value_counts() - - -# In[46]: - - -# ajout des infos de DOAJ : -# Journal title -# DOAJ Seal -doaj_for_merge = doaj[['issnl', 'Journal title', 'DOAJ Seal', 'APC']] -doaj_for_merge - - -# In[47]: - - -# renommer les colonnes -doaj_for_merge = doaj_for_merge.rename(columns={'Journal title' : 'doaj_title', 'DOAJ Seal' : 'doaj_seal'}) -doaj_for_merge - - -# In[48]: - - -# merge avec journals -journals = pd.merge(journals, doaj_for_merge, on='issnl', how='left') -journals - - -# In[49]: - - -# ajouter info sur la presence sur DOAJ ou du seal -journals.loc[journals['doaj_title'].isna(), 'doaj_status'] = 0 -journals.loc[~journals['doaj_title'].isna(), 'doaj_status'] = 1 -journals.loc[journals['doaj_seal'] == 'Yes', 'doaj_seal'] = 1 -journals.loc[journals['doaj_seal'] == 'No', 'doaj_seal'] = 0 -journals - - -# ### LOCKSS - -# In[50]: - - -# ajout des infos de preservation LOCKSS, Portico et Licences Nationales -lockss = pd.read_csv('lockss/keepers-LOCKSS-report.csv', encoding='utf-8', header=0, skiprows=1) -lockss - - -# In[51]: - - -# ajout ISSNL -lockss['issn'] = lockss['eISSN'] -lockss.loc[lockss['eISSN'].isna(), 'issn'] = lockss['ISSN'] -lockss - - -# In[52]: - - -lockss = pd.merge(lockss, df_issnl, on='issn', how='left') -lockss - - -# In[53]: - - -lockss.columns - - -# In[54]: - - -# test des lignes sans merge -lockss.loc[lockss['issnl'].isna()] - - -# In[55]: - - -# utiliser l'ISSN à la place sur ces lignes -lockss.loc[lockss['issnl'].isna(), 'issnl'] = lockss['issn'] - - -# In[56]: - - -# test des lignes sans merge -lockss.loc[lockss['issnl'].isna()] - - -# In[57]: - - -# ajout des infos de LOCKSS : -# Title -lockss_for_merge = lockss[['issnl', 'Title']] -lockss_for_merge - - -# In[58]: - - -# renommer les colonnes -lockss_for_merge = lockss_for_merge.rename(columns={'Title' : 'lockss_title'}) -lockss_for_merge - - -# In[59]: - - -# merge avec journals -journals = pd.merge(journals, lockss_for_merge, on='issnl', how='left') -journals - - -# In[60]: - - -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals - - -# In[61]: - - -# ajouter info sur la presence sur LOCKSS -journals.loc[journals['lockss_title'].isna(), 'lockss'] = 0 -journals.loc[~journals['lockss_title'].isna(), 'lockss'] = 1 -journals - - -# ### Portico - -# In[62]: - - -# ajout des infos de preservation Portico -portico = pd.read_excel('portico/e-journals.xlsx', sheet_name='Details', skiprows=2) -portico - - -# In[63]: - - -# ajout ISSNL -portico['issn'] = portico['e-ISSN'] -portico.loc[portico['e-ISSN'].isna(), 'issn'] = portico['Print ISSN'] -portico - - -# In[64]: - - -portico = pd.merge(portico, df_issnl, on='issn', how='left') -portico - - -# In[65]: - - -portico.columns - - -# In[66]: - - -# test des lignes sans merge -portico.loc[portico['issnl'].isna()] - - -# In[67]: - - -# utiliser l'ISSN à la place sur ces lignes -portico.loc[portico['issnl'].isna(), 'issnl'] = portico['issn'] - - -# In[68]: - - -# test des lignes sans merge -portico.loc[portico['issnl'].isna()] - - -# In[69]: - - -# ajout des infos de Portico : -# Status -portico_for_merge = portico[['issnl', 'Status']] -portico_for_merge - - -# In[70]: - - -# garder les lignes "preserved" -portico_for_merge = portico_for_merge.loc[portico_for_merge['Status'] == 'preserved'] -portico_for_merge - - -# In[71]: - - -# renommer les colonnes -portico_for_merge = portico_for_merge.rename(columns={'Status' : 'portico_status'}) -portico_for_merge - - -# In[72]: - - -# merge avec journals -journals = pd.merge(journals, portico_for_merge, on='issnl', how='left') -journals - - -# In[73]: - - -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals - - -# In[74]: - - -# ajouter info sur la presence sur portico -journals.loc[journals['portico_status'].isna(), 'portico'] = 0 -journals.loc[~journals['portico_status'].isna(), 'portico'] = 1 -journals - - -# ### Licences Nationales - -# In[75]: - - -# ajout des infos de preservation des Licences nationales -nlch1 = pd.read_excel('licences_nationales/cambridge_Switzerland_NationalLicences_2020-08-17.xlsx') -nlch1 - - -# In[76]: - - -# ajout des infos de preservation des Licences nationales -nlch2 = pd.read_excel('licences_nationales/gruyter_Switzerland_NationalLicences_2020-11-30.xlsx') -nlch2 - - -# In[77]: - - -# ajout des infos de preservation des Licences nationales -nlch3 = pd.read_excel('licences_nationales/oxford_Switzerland_NationalLicences_2020-09-24.xlsx') -nlch3 - - -# In[78]: - - -# ajout des infos de preservation des Licences nationales -nlch4 = pd.read_excel('licences_nationales/springer_Switzerland_NationalLicences_2020-08-12.xlsx') -nlch4 - - -# In[79]: - - -# concatener les 4 -nlch = pd.concat([nlch1, nlch2, nlch3, nlch4], ignore_index=True) -nlch - - -# In[80]: - - -nlch.columns - - -# In[81]: - - -# ajout ISSNL -nlch['issn'] = nlch['online_identifier'] -nlch.loc[nlch['online_identifier'].isna(), 'issn'] = nlch['print_identifier'] -nlch - - -# In[82]: - - -nlch = pd.merge(nlch, df_issnl, on='issn', how='left') -nlch - - -# In[83]: - - -# test des lignes sans merge -nlch.loc[nlch['issnl'].isna()] - - -# In[84]: - - -# utiliser l'ISSN à la place sur ces lignes -nlch.loc[nlch['issnl'].isna(), 'issnl'] = nlch['issn'] - - -# In[85]: - - -# test des lignes sans merge -nlch.loc[nlch['issnl'].isna()] - - -# In[86]: - - -# ajout des infos de nlch : -# publication_title -nlch_for_merge = nlch[['issnl', 'publication_title']] -nlch_for_merge - - -# In[87]: - - -# renommer les colonnes -nlch_for_merge = nlch_for_merge.rename(columns={'publication_title' : 'nlch_title'}) -nlch_for_merge - - -# In[88]: - - -# merge avec journals -journals = pd.merge(journals, nlch_for_merge, on='issnl', how='left') -journals - - -# In[89]: - - -# ajouter info sur la presence sur portico -journals.loc[journals['nlch_title'].isna(), 'nlch'] = 0 -journals.loc[~journals['nlch_title'].isna(), 'nlch'] = 1 -journals - - -# ### QOAM - -# In[90]: - - -# ouverture du fichier -qoam = pd.read_csv('qoam/qoam_not_zero.tsv', encoding='utf-8', header=0, sep='\t') -qoam - - -# In[91]: - - -qoam = pd.merge(qoam, df_issnl, on='issn', how='left') -qoam - - -# In[92]: - - -# test des lignes sans merge -qoam.loc[qoam['issnl'].isna()] - - -# In[93]: - - -# utiliser l'ISSN à la place sur ces lignes -qoam.loc[qoam['issnl'].isna(), 'issnl'] = qoam['issn'] - - -# In[94]: - - -# test des lignes sans merge -qoam.loc[qoam['issnl'].isna()] - - -# In[95]: - - -# ajout des infos de qoam : -# publication_title -qoam_for_merge = qoam[['issnl', 'qoam_av_score']] -qoam_for_merge - - -# In[96]: - - -# merge avec journals -journals = pd.merge(journals, qoam_for_merge, on='issnl', how='left') -journals - - -# In[97]: - - -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals - - -# ## Finalisation de la table journals - -# In[98]: - - -# test des doublons -journals_doublons = journals[['issn', 'issnl', 'title']].loc[journals.duplicated(subset='issnl')].sort_values(by='issnl') -journals_doublons - - -# In[99]: - - -journals_doublons = journals_doublons.loc[journals_doublons['issnl'].notna()] - - -# In[100]: - - -# merge pour voir les lignes avec doublon -journals_doublons['doublon_issnl'] = 1 -journals = pd.merge(journals, journals_doublons[['issnl', 'doublon_issnl']], on='issnl', how='left') -journals.loc[journals['doublon_issnl'] == 1] - - -# In[101]: - - -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl') - - -# In[102]: - - -# export csv des doublons -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl').to_csv('sample/journals_duplicates.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[103]: - - -# export excel des doublons -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl').to_excel('sample/journals_duplicates.xlsx', index=False) - - -# In[104]: - - -# suppression des doublons -journals = journals.drop_duplicates(subset=['issnl']) -journals - - -# In[105]: - - -# ajout du oa_status -# 6 : Diamond -# 5 : Gold -# 4 : Full -# 3 : Hybrid -# 2 : Green -# 1 : UNKNOWN -journals['oa_status'] = 1 -journals - - -# In[106]: - - -# status 5 pour les revues DOAJ -journals.loc[journals['doaj_status'] == 1, 'oa_status'] = 5 -# status 6 pour les revues DOAJ avec APC = 0 -journals.loc[(journals['doaj_status'] == 1) & (journals['APC'] == 'No'), 'oa_status'] = 6 -journals - - -# In[107]: - - -journals['oa_status'].value_counts() - - -# In[108]: - - -# export csv brut -journals.to_csv('sample/journals_brut.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[109]: - - -# export excel brut -journals.to_excel('sample/journals_brut.xlsx', index=False) - - -# In[110]: - - -# export csv des ids -journals[['id', 'title', 'issn', 'issnl']].to_csv('sample/journals_ids.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[111]: - - -# export excel des ids -journals[['id', 'title', 'issn', 'issnl']].to_excel('sample/journals_ids.xlsx', index=False) - - -# In[ ]: - - - - diff --git a/import_scripts/04_oacct_publishers.md b/import_scripts/04_oacct_publishers.md deleted file mode 100644 index c855a57c..00000000 --- a/import_scripts/04_oacct_publishers.md +++ /dev/null @@ -1,2826 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Extraction des données des éditeurs - -Sources : -1. Données de ISSN.org (JSON) - -### Format des données source - -* Noeud : "@graph" -* spatial & publisher : - * "@id": "resource/ISSN/0140-6736", - * "spatial": [ - "http://id.loc.gov/vocabulary/countries/ne", - "https://www.iso.org/obp/ui/#iso:code:3166:NL" - ], - -Exemple avec plusieurs éditeurs dans le temps : - - "publisher": [ - "resource/ISSN/0140-6736#Publisher-Elsevier", - "resource/ISSN/0140-6736#Publisher-J._Onwhyn" - ], - - { - "@id": "resource/ISSN/0140-6736#LatestPublicationEvent", - "@type": "http://schema.org/PublicationEvent", - "publishedBy": "resource/ISSN/0140-6736#Publisher-Elsevier", - "location": "resource/ISSN/0140-6736#PublicationPlace-Amsterdam" - }, - - { - "@id": "resource/ISSN/0140-6736#Publisher-Elsevier", - "@type": "http://schema.org/Organization", - "name": "Elsevier" - }, - -Exemple avec un seul éditeur dans le temps : - - "publisher": "resource/ISSN/0899-8418#Publisher-Wiley", - - { - "@id": "resource/ISSN/0899-8418#EarliestPublicationEvent", - "@type": "http://schema.org/PublicationEvent", - "publishedBy": "resource/ISSN/0899-8418#Publisher-Wiley", - "temporal": "c1989-", - "location": [ - "resource/ISSN/0899-8418#PublicationPlace-New_York", - "resource/ISSN/0899-8418#PublicationPlace-Chichester" - ] - }, - - { - "@id": "resource/ISSN/0899-8418#Publisher-Wiley", - "@type": "http://schema.org/Organization", - "name": "Wiley" - }, - -Exemple avec une liste d'éditeurs finaux : - - { - "@id": "resource/ISSN/2174-8454#LatestPublicationEvent", - "@type": "http://schema.org/PublicationEvent", - "publishedBy": [ - "resource/ISSN/2174-8454#Publisher-The_Global_Studies_Institute_de_l’Université_de_Genève", - "resource/ISSN/2174-8454#Publisher-Universitat_de_València,_Departamento_de_Teoría_de_los_Lenguajes_y_Ciencias_de_la_Comunicación" - ], - "location": "resource/ISSN/2174-8454#PublicationPlace-Valencia" - }, - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -``` - -## Table Publishers - - -```python -# creation du DF -# 'country' supprimé pour l'ajouter aux journaux -# 'oa_status' supprimé pour le moment -col_names = ['id', - 'name', - 'publisher_id_issn', - ] -publisher_issn = pd.DataFrame(columns = col_names) -publisher_issn -``` - - - - -
- - - - - - - - - - - - -
idnamepublisher_id_issn
-
- - - -## Table Journals - - -```python -journal = pd.read_csv('sample/journals_brut.tsv', encoding='utf-8', header=0, sep='\t') -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...doaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...1.0PLoS One1.0NaN0.0NaN0.04.035714NaN5
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
..................................................................
9069970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9079980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9089991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN1
90910000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
91010010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
-

911 rows × 23 columns

-
- - - -## Table Journals Publishers - - -```python -# creation du DF -col_names = ['journal', - 'publisher_id_issn' - ] -journal_publisher = pd.DataFrame(columns = col_names) -journal_publisher -``` - - - - -
- - - - - - - - - - - -
journalpublisher_id_issn
-
- - - - -```python -# extraction des informations à partir des données ISSN.org -for index, row in journal.iterrows(): - journal_id = row['id'] - journal_issn = row['issn'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - publisher_name = '' - publisher_country = '' - publisher_id = '' - publisher_id_first = '' - publisher_id_last = '' - # export en json - if os.path.exists('issn/data/' + journal_issn + '.json'): - with open('issn/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + journal_issn + '#LatestPublicationEvent'): - if ('publishedBy' in x): - publisher_id_last = x['publishedBy'] - elif (x['@id'] == 'resource/ISSN/' + journal_issn + '#EarliestPublicationEvent'): - if ('publishedBy' in x): - publisher_id_first = x['publishedBy'] - if (publisher_id_last != ''): - publisher_id = publisher_id_last - else : - publisher_id = publisher_id_first - if type(publisher_id) is list: - for pid in publisher_id: - if (pid != ''): - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == pid): - if ('name' in x): - publisher_name = x['name'] - publisher_issn = publisher_issn.append({'publisher_id_issn' : pid, 'name' : publisher_name}, ignore_index=True) - journal_publisher = journal_publisher.append({'journal' : journal_id, 'publisher_id_issn' : pid}, ignore_index=True) - else : - if (publisher_id != ''): - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == publisher_id): - if ('name' in x): - publisher_name = x['name'] - publisher_issn = publisher_issn.append({'publisher_id_issn' : publisher_id, 'name' : publisher_name}, ignore_index=True) - journal_publisher = journal_publisher.append({'journal' : journal_id, 'publisher_id_issn' : publisher_id}, ignore_index=True) - else : - print(row['issn'] + ' - pas trouvé') -``` - - 0 - 10 - 20 - 30 - 40 - 50 - 60 - 70 - 80 - 90 - 100 - 110 - 120 - 130 - 140 - 150 - 160 - 170 - 180 - 190 - 200 - 210 - 220 - 230 - 240 - 250 - 260 - 270 - 280 - 290 - 300 - 310 - 320 - 330 - 340 - 350 - 360 - 370 - 380 - 390 - 400 - 410 - 420 - 430 - 440 - 450 - 460 - 470 - 480 - 490 - 500 - 510 - 520 - 530 - 540 - 550 - 560 - 570 - 580 - 590 - 600 - 610 - 620 - 630 - 640 - 650 - 660 - 670 - 680 - 690 - 700 - 710 - 720 - 730 - 740 - 750 - 760 - 770 - 780 - 790 - 800 - 810 - 820 - 830 - 840 - 850 - 860 - 870 - 880 - 890 - 900 - 910 - - - -```python -publisher_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamepublisher_id_issn
0NaNRevue Médicale Suisseresource/ISSN/1660-9379#Publisher-Revue_Médica...
1NaNAmerican Physical Societyresource/ISSN/0031-9007#Publisher-American_Phy...
2NaNPublic Library of Scienceresource/ISSN/1932-6203#Publisher-Public_Libra...
3NaNThe Global Studies Institute de l’Université d...resource/ISSN/2174-8454#Publisher-The_Global_S...
4NaNUniversitat de València, Departamento de Teorí...resource/ISSN/2174-8454#Publisher-Universitat_...
............
940NaNIOP Publishingresource/ISSN/0964-1726#Publisher-IOP_Publishing
941NaNElsevier [etc.]resource/ISSN/0022-3468#Publisher-Elsevier_[etc.]
942NaNSpringerresource/ISSN/1432-2064#Publisher-Springer
943NaNPergamonresource/ISSN/0960-1481#Publisher-Pergamon
944NaNAmerican Physiological Societyresource/ISSN/0161-7567#Publisher-American_Phy...
-

945 rows × 3 columns

-
- - - - -```python -# simlification des IDs -publisher_issn[['publisher_id_racine', 'publisher_id_fin']] = publisher_issn['publisher_id_issn'].str.split('#Publisher-', n=1, expand=True) -publisher_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamepublisher_id_issnpublisher_id_racinepublisher_id_fin
0NaNRevue Médicale Suisseresource/ISSN/1660-9379#Publisher-Revue_Médica...resource/ISSN/1660-9379Revue_Médicale_Suisse
1NaNAmerican Physical Societyresource/ISSN/0031-9007#Publisher-American_Phy...resource/ISSN/0031-9007American_Physical_Society
2NaNPublic Library of Scienceresource/ISSN/1932-6203#Publisher-Public_Libra...resource/ISSN/1932-6203Public_Library_of_Science
3NaNThe Global Studies Institute de l’Université d...resource/ISSN/2174-8454#Publisher-The_Global_S...resource/ISSN/2174-8454The_Global_Studies_Institute_de_l’Université_d...
4NaNUniversitat de València, Departamento de Teorí...resource/ISSN/2174-8454#Publisher-Universitat_...resource/ISSN/2174-8454Universitat_de_València,_Departamento_de_Teorí...
..................
940NaNIOP Publishingresource/ISSN/0964-1726#Publisher-IOP_Publishingresource/ISSN/0964-1726IOP_Publishing
941NaNElsevier [etc.]resource/ISSN/0022-3468#Publisher-Elsevier_[etc.]resource/ISSN/0022-3468Elsevier_[etc.]
942NaNSpringerresource/ISSN/1432-2064#Publisher-Springerresource/ISSN/1432-2064Springer
943NaNPergamonresource/ISSN/0960-1481#Publisher-Pergamonresource/ISSN/0960-1481Pergamon
944NaNAmerican Physiological Societyresource/ISSN/0161-7567#Publisher-American_Phy...resource/ISSN/0161-7567American_Physiological_Society
-

945 rows × 5 columns

-
- - - - -```python -# simplifications -del publisher_issn['publisher_id_issn'] -del publisher_issn['publisher_id_racine'] -del publisher_issn['id'] -publisher_issn = publisher_issn.rename(columns={'publisher_id_fin': 'publisher_id_issn'}) -publisher_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namepublisher_id_issn
0Revue Médicale SuisseRevue_Médicale_Suisse
1American Physical SocietyAmerican_Physical_Society
2Public Library of SciencePublic_Library_of_Science
3The Global Studies Institute de l’Université d...The_Global_Studies_Institute_de_l’Université_d...
4Universitat de València, Departamento de Teorí...Universitat_de_València,_Departamento_de_Teorí...
.........
940IOP PublishingIOP_Publishing
941Elsevier [etc.]Elsevier_[etc.]
942SpringerSpringer
943PergamonPergamon
944American Physiological SocietyAmerican_Physiological_Society
-

945 rows × 2 columns

-
- - - - -```python -# supprimer les crochets et supprimer les doublons -# publisher['publisher_id'] = publisher['publisher_id'].str.replace('[', '') -# publisher['publisher_id'] = publisher['publisher_id'].str.replace(']', '') -# publisher['name'] = publisher['name'].str.replace('[', '') -# publisher['name'] = publisher['name'].str.replace(']', '') -publisher_issn = publisher_issn.drop_duplicates(subset=['publisher_id_issn']) -publisher_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namepublisher_id_issn
0Revue Médicale SuisseRevue_Médicale_Suisse
1American Physical SocietyAmerican_Physical_Society
2Public Library of SciencePublic_Library_of_Science
3The Global Studies Institute de l’Université d...The_Global_Studies_Institute_de_l’Université_d...
4Universitat de València, Departamento de Teorí...Universitat_de_València,_Departamento_de_Teorí...
.........
929FisherFisher
930Tipografia La CommercialeTipografia_La_Commerciale
932Red.: Prof. Dr. F. Cavalli, Istituto oncologic...Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...
934Excerpta MedicaExcerpta_Medica
937Generative Grammar Group of the Department of ...Generative_Grammar_Group_of_the_Department_of_...
-

380 rows × 2 columns

-
- - - - -```python -# test publishers sans nom -publisher_issn.loc[publisher_issn['name'] == ''] -``` - - - - -
- - - - - - - - - - - - - - - - -
namepublisher_id_issn
241None
-
- - - - -```python -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_id_issn
01resource/ISSN/1660-9379#Publisher-Revue_Médica...
12resource/ISSN/0031-9007#Publisher-American_Phy...
23resource/ISSN/1932-6203#Publisher-Public_Libra...
34resource/ISSN/2174-8454#Publisher-The_Global_S...
44resource/ISSN/2174-8454#Publisher-Universitat_...
.........
940997resource/ISSN/0964-1726#Publisher-IOP_Publishing
941998resource/ISSN/0022-3468#Publisher-Elsevier_[etc.]
942999resource/ISSN/1432-2064#Publisher-Springer
9431000resource/ISSN/0960-1481#Publisher-Pergamon
9441001resource/ISSN/0161-7567#Publisher-American_Phy...
-

945 rows × 2 columns

-
- - - - -```python -# simlification des IDs -journal_publisher[['publisher_id_racine', 'publisher_id_fin']] = journal_publisher['publisher_id_issn'].str.split('#Publisher-', n=1, expand=True) -# simplifications -del journal_publisher['publisher_id_issn'] -del journal_publisher['publisher_id_racine'] -journal_publisher = journal_publisher.rename(columns={'publisher_id_fin': 'publisher_id_issn'}) -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_id_issn
01Revue_Médicale_Suisse
12American_Physical_Society
23Public_Library_of_Science
34The_Global_Studies_Institute_de_l’Université_d...
44Universitat_de_València,_Departamento_de_Teorí...
.........
940997IOP_Publishing
941998Elsevier_[etc.]
942999Springer
9431000Pergamon
9441001American_Physiological_Society
-

945 rows × 2 columns

-
- - - - -```python -# merge avec journals -journal_publisher = pd.merge(journal_publisher, publisher_issn, on='publisher_id_issn', how='left') -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_id_issnname
01Revue_Médicale_SuisseRevue Médicale Suisse
12American_Physical_SocietyAmerican Physical Society
23Public_Library_of_SciencePublic Library of Science
34The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...
44Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...
............
940997IOP_PublishingIOP Publishing
941998Elsevier_[etc.]Elsevier [etc.]
942999SpringerSpringer
9431000PergamonPergamon
9441001American_Physiological_SocietyAmerican Physiological Society
-

945 rows × 3 columns

-
- - - - -```python -journal_publisher = journal_publisher.rename(columns={'publisher_id_issn': 'publisher_id'}) -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idname
01Revue_Médicale_SuisseRevue Médicale Suisse
12American_Physical_SocietyAmerican Physical Society
23Public_Library_of_SciencePublic Library of Science
34The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...
44Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...
............
940997IOP_PublishingIOP Publishing
941998Elsevier_[etc.]Elsevier [etc.]
942999SpringerSpringer
9431000PergamonPergamon
9441001American_Physiological_SocietyAmerican Physiological Society
-

945 rows × 3 columns

-
- - - - -```python -publisher = journal_publisher[['publisher_id', 'name']] -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idname
0Revue_Médicale_SuisseRevue Médicale Suisse
1American_Physical_SocietyAmerican Physical Society
2Public_Library_of_SciencePublic Library of Science
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...
.........
940IOP_PublishingIOP Publishing
941Elsevier_[etc.]Elsevier [etc.]
942SpringerSpringer
943PergamonPergamon
944American_Physiological_SocietyAmerican Physiological Society
-

945 rows × 2 columns

-
- - - - -```python -# supprimer les doublons -publisher = publisher.drop_duplicates(subset='publisher_id') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idname
0Revue_Médicale_SuisseRevue Médicale Suisse
1American_Physical_SocietyAmerican Physical Society
2Public_Library_of_SciencePublic Library of Science
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...
.........
929FisherFisher
930Tipografia_La_CommercialeTipografia La Commerciale
932Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...
934Excerpta_MedicaExcerpta Medica
937Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...
-

380 rows × 2 columns

-
- - - - -```python -# convertir l'index en id -publisher = publisher.reset_index() -# ajout de l'id avec l'index + 1 -publisher['id'] = publisher['index'] + 1 -del publisher['index'] -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idnameid
0Revue_Médicale_SuisseRevue Médicale Suisse1
1American_Physical_SocietyAmerican Physical Society2
2Public_Library_of_SciencePublic Library of Science3
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
............
375FisherFisher930
376Tipografia_La_CommercialeTipografia La Commerciale931
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...933
378Excerpta_MedicaExcerpta Medica935
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...938
-

380 rows × 3 columns

-
- - - - -```python -# convertir l'index en id -publisher = publisher.reset_index() -# ajout de l'id avec l'index + 1 -publisher['id'] = publisher['index'] + 1 -del publisher['index'] -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idnameid
0Revue_Médicale_SuisseRevue Médicale Suisse1
1American_Physical_SocietyAmerican Physical Society2
2Public_Library_of_SciencePublic Library of Science3
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
............
375FisherFisher376
376Tipografia_La_CommercialeTipografia La Commerciale377
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378
378Excerpta_MedicaExcerpta Medica379
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380
-

380 rows × 3 columns

-
- - - - -```python -# ajout de la valeur UNKNOWN -# 'country': 999999 -publisher = publisher.append({'id' : 999999, 'name' : 'UNKNOWN', 'publisher_id': '999999'}, ignore_index=True) -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idnameid
0Revue_Médicale_SuisseRevue Médicale Suisse1
1American_Physical_SocietyAmerican Physical Society2
2Public_Library_of_SciencePublic Library of Science3
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
............
376Tipografia_La_CommercialeTipografia La Commerciale377
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378
378Excerpta_MedicaExcerpta Medica379
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380
380999999UNKNOWN999999
-

381 rows × 3 columns

-
- - - - -```python -# recuperation de l'id du publisher -journal_publisher = pd.merge(journal_publisher, publisher[['publisher_id', 'id']], on='publisher_id', how='left') -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idnameid
01Revue_Médicale_SuisseRevue Médicale Suisse1
12American_Physical_SocietyAmerican Physical Society2
23Public_Library_of_SciencePublic Library of Science3
34The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
44Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
...............
940997IOP_PublishingIOP Publishing47
941998Elsevier_[etc.]Elsevier [etc.]75
942999SpringerSpringer8
9431000PergamonPergamon119
9441001American_Physiological_SocietyAmerican Physiological Society217
-

945 rows × 4 columns

-
- - - - -```python -journal_publisher = journal_publisher.rename(columns={'id': 'publisher'}) -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idnamepublisher
01Revue_Médicale_SuisseRevue Médicale Suisse1
12American_Physical_SocietyAmerican Physical Society2
23Public_Library_of_SciencePublic Library of Science3
34The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
44Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
...............
940997IOP_PublishingIOP Publishing47
941998Elsevier_[etc.]Elsevier [etc.]75
942999SpringerSpringer8
9431000PergamonPergamon119
9441001American_Physiological_SocietyAmerican Physiological Society217
-

945 rows × 4 columns

-
- - - - -```python -# ajout du publisher id au journals_brut -journal_publisher_ids = journal_publisher[['journal', 'publisher']] -journal_publisher_ids = journal_publisher_ids.rename(columns={'journal': 'id'}) -journal_publisher_ids['publisher'] = journal_publisher_ids['publisher'].astype(str) -journal_publisher_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idpublisher
011
122
233
344
445
.........
94099747
94199875
9429998
9431000119
9441001217
-

945 rows × 2 columns

-
- - - - -```python -# concat valeurs avec même id -journal_publisher_grouped = journal_publisher_ids.groupby('id').agg({'publisher': lambda x: ', '.join(x)}) -journal_publisher_grouped -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher
id
11
22
33
44, 5
56
......
99747
99875
9998
1000119
1001217
-

911 rows × 1 columns

-
- - - - -```python -# recuperation de l'id du publisher -journals = pd.merge(journal, journal_publisher_grouped, on='id', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...lockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_statuspublisher
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...NaN0.0NaN0.0NaN0.0NaNNaN11
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...NaN0.0preserved1.0NaN0.0NaN1.012
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...PLoS One1.0NaN0.0NaN0.04.035714NaN53
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...NaN0.0NaN0.0NaN0.0NaNNaN14, 5
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...NaN0.0preserved1.0NaN0.0NaN1.016
..................................................................
9069970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...NaN0.0preserved1.0NaN0.0NaNNaN147
9079980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...NaN0.0preserved1.0NaN0.0NaNNaN175
9089991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18
90910000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...NaN0.0preserved1.0NaN0.0NaNNaN1119
91010010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...NaN0.0NaN0.0NaN0.0NaNNaN1217
-

911 rows × 24 columns

-
- - - - -```python -# export csv -publisher.to_csv('sample/publishers_brut.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -publisher.to_excel('sample/publishers_brut.xlsx', index=False) -``` - - -```python -# export csv brut des journals -journals.to_csv('sample/journals_publishers_brut.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel brut -journals.to_excel('sample/journals_publishers_brut.xlsx', index=False) -``` - - -```python -# export csv brut des ids -journal_publisher_ids.to_csv('sample/journals_publishers_ids.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel brut des ids -journal_publisher_ids.to_excel('sample/journals_publishers_ids.xlsx', index=False) -``` diff --git a/import_scripts/04_oacct_publishers.py b/import_scripts/04_oacct_publishers.py deleted file mode 100644 index d18e1e59..00000000 --- a/import_scripts/04_oacct_publishers.py +++ /dev/null @@ -1,387 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Extraction des données des éditeurs -# -# Sources : -# 1. Données de ISSN.org (JSON) -# -# ### Format des données source -# -# * Noeud : "@graph" -# * spatial & publisher : -# * "@id": "resource/ISSN/0140-6736", -# * "spatial": [ -# "http://id.loc.gov/vocabulary/countries/ne", -# "https://www.iso.org/obp/ui/#iso:code:3166:NL" -# ], -# -# Exemple avec plusieurs éditeurs dans le temps : -# -# "publisher": [ -# "resource/ISSN/0140-6736#Publisher-Elsevier", -# "resource/ISSN/0140-6736#Publisher-J._Onwhyn" -# ], -# -# { -# "@id": "resource/ISSN/0140-6736#LatestPublicationEvent", -# "@type": "http://schema.org/PublicationEvent", -# "publishedBy": "resource/ISSN/0140-6736#Publisher-Elsevier", -# "location": "resource/ISSN/0140-6736#PublicationPlace-Amsterdam" -# }, -# -# { -# "@id": "resource/ISSN/0140-6736#Publisher-Elsevier", -# "@type": "http://schema.org/Organization", -# "name": "Elsevier" -# }, -# -# Exemple avec un seul éditeur dans le temps : -# -# "publisher": "resource/ISSN/0899-8418#Publisher-Wiley", -# -# { -# "@id": "resource/ISSN/0899-8418#EarliestPublicationEvent", -# "@type": "http://schema.org/PublicationEvent", -# "publishedBy": "resource/ISSN/0899-8418#Publisher-Wiley", -# "temporal": "c1989-", -# "location": [ -# "resource/ISSN/0899-8418#PublicationPlace-New_York", -# "resource/ISSN/0899-8418#PublicationPlace-Chichester" -# ] -# }, -# -# { -# "@id": "resource/ISSN/0899-8418#Publisher-Wiley", -# "@type": "http://schema.org/Organization", -# "name": "Wiley" -# }, -# -# Exemple avec une liste d'éditeurs finaux : -# -# { -# "@id": "resource/ISSN/2174-8454#LatestPublicationEvent", -# "@type": "http://schema.org/PublicationEvent", -# "publishedBy": [ -# "resource/ISSN/2174-8454#Publisher-The_Global_Studies_Institute_de_l’Université_de_Genève", -# "resource/ISSN/2174-8454#Publisher-Universitat_de_València,_Departamento_de_Teoría_de_los_Lenguajes_y_Ciencias_de_la_Comunicación" -# ], -# "location": "resource/ISSN/2174-8454#PublicationPlace-Valencia" -# }, - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os - - -# ## Table Publishers - -# In[2]: - - -# creation du DF -# 'country' supprimé pour l'ajouter aux journaux -# 'oa_status' supprimé pour le moment -col_names = ['id', - 'name', - 'publisher_id_issn', - ] -publisher_issn = pd.DataFrame(columns = col_names) -publisher_issn - - -# ## Table Journals - -# In[3]: - - -journal = pd.read_csv('sample/journals_brut.tsv', encoding='utf-8', header=0, sep='\t') -journal - - -# ## Table Journals Publishers - -# In[4]: - - -# creation du DF -col_names = ['journal', - 'publisher_id_issn' - ] -journal_publisher = pd.DataFrame(columns = col_names) -journal_publisher - - -# In[5]: - - -# extraction des informations à partir des données ISSN.org -for index, row in journal.iterrows(): - journal_id = row['id'] - journal_issn = row['issn'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - publisher_name = '' - publisher_country = '' - publisher_id = '' - publisher_id_first = '' - publisher_id_last = '' - # export en json - if os.path.exists('issn/data/' + journal_issn + '.json'): - with open('issn/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + journal_issn + '#LatestPublicationEvent'): - if ('publishedBy' in x): - publisher_id_last = x['publishedBy'] - elif (x['@id'] == 'resource/ISSN/' + journal_issn + '#EarliestPublicationEvent'): - if ('publishedBy' in x): - publisher_id_first = x['publishedBy'] - if (publisher_id_last != ''): - publisher_id = publisher_id_last - else : - publisher_id = publisher_id_first - if type(publisher_id) is list: - for pid in publisher_id: - if (pid != ''): - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == pid): - if ('name' in x): - publisher_name = x['name'] - publisher_issn = publisher_issn.append({'publisher_id_issn' : pid, 'name' : publisher_name}, ignore_index=True) - journal_publisher = journal_publisher.append({'journal' : journal_id, 'publisher_id_issn' : pid}, ignore_index=True) - else : - if (publisher_id != ''): - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == publisher_id): - if ('name' in x): - publisher_name = x['name'] - publisher_issn = publisher_issn.append({'publisher_id_issn' : publisher_id, 'name' : publisher_name}, ignore_index=True) - journal_publisher = journal_publisher.append({'journal' : journal_id, 'publisher_id_issn' : publisher_id}, ignore_index=True) - else : - print(row['issn'] + ' - pas trouvé') - - -# In[6]: - - -publisher_issn - - -# In[7]: - - -# simlification des IDs -publisher_issn[['publisher_id_racine', 'publisher_id_fin']] = publisher_issn['publisher_id_issn'].str.split('#Publisher-', n=1, expand=True) -publisher_issn - - -# In[8]: - - -# simplifications -del publisher_issn['publisher_id_issn'] -del publisher_issn['publisher_id_racine'] -del publisher_issn['id'] -publisher_issn = publisher_issn.rename(columns={'publisher_id_fin': 'publisher_id_issn'}) -publisher_issn - - -# In[9]: - - -# supprimer les crochets et supprimer les doublons -# publisher['publisher_id'] = publisher['publisher_id'].str.replace('[', '') -# publisher['publisher_id'] = publisher['publisher_id'].str.replace(']', '') -# publisher['name'] = publisher['name'].str.replace('[', '') -# publisher['name'] = publisher['name'].str.replace(']', '') -publisher_issn = publisher_issn.drop_duplicates(subset=['publisher_id_issn']) -publisher_issn - - -# In[10]: - - -# test publishers sans nom -publisher_issn.loc[publisher_issn['name'] == ''] - - -# In[11]: - - -journal_publisher - - -# In[12]: - - -# simlification des IDs -journal_publisher[['publisher_id_racine', 'publisher_id_fin']] = journal_publisher['publisher_id_issn'].str.split('#Publisher-', n=1, expand=True) -# simplifications -del journal_publisher['publisher_id_issn'] -del journal_publisher['publisher_id_racine'] -journal_publisher = journal_publisher.rename(columns={'publisher_id_fin': 'publisher_id_issn'}) -journal_publisher - - -# In[13]: - - -# merge avec journals -journal_publisher = pd.merge(journal_publisher, publisher_issn, on='publisher_id_issn', how='left') -journal_publisher - - -# In[14]: - - -journal_publisher = journal_publisher.rename(columns={'publisher_id_issn': 'publisher_id'}) -journal_publisher - - -# In[15]: - - -publisher = journal_publisher[['publisher_id', 'name']] -publisher - - -# In[16]: - - -# supprimer les doublons -publisher = publisher.drop_duplicates(subset='publisher_id') -publisher - - -# In[17]: - - -# convertir l'index en id -publisher = publisher.reset_index() -# ajout de l'id avec l'index + 1 -publisher['id'] = publisher['index'] + 1 -del publisher['index'] -publisher - - -# In[18]: - - -# convertir l'index en id -publisher = publisher.reset_index() -# ajout de l'id avec l'index + 1 -publisher['id'] = publisher['index'] + 1 -del publisher['index'] -publisher - - -# In[19]: - - -# ajout de la valeur UNKNOWN -# 'country': 999999 -publisher = publisher.append({'id' : 999999, 'name' : 'UNKNOWN', 'publisher_id': '999999'}, ignore_index=True) -publisher - - -# In[20]: - - -# recuperation de l'id du publisher -journal_publisher = pd.merge(journal_publisher, publisher[['publisher_id', 'id']], on='publisher_id', how='left') -journal_publisher - - -# In[21]: - - -journal_publisher = journal_publisher.rename(columns={'id': 'publisher'}) -journal_publisher - - -# In[22]: - - -# ajout du publisher id au journals_brut -journal_publisher_ids = journal_publisher[['journal', 'publisher']] -journal_publisher_ids = journal_publisher_ids.rename(columns={'journal': 'id'}) -journal_publisher_ids['publisher'] = journal_publisher_ids['publisher'].astype(str) -journal_publisher_ids - - -# In[23]: - - -# concat valeurs avec même id -journal_publisher_grouped = journal_publisher_ids.groupby('id').agg({'publisher': lambda x: ', '.join(x)}) -journal_publisher_grouped - - -# In[24]: - - -# recuperation de l'id du publisher -journals = pd.merge(journal, journal_publisher_grouped, on='id', how='left') -journals - - -# In[25]: - - -# export csv -publisher.to_csv('sample/publishers_brut.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[26]: - - -# export excel -publisher.to_excel('sample/publishers_brut.xlsx', index=False) - - -# In[27]: - - -# export csv brut des journals -journals.to_csv('sample/journals_publishers_brut.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[28]: - - -# export excel brut -journals.to_excel('sample/journals_publishers_brut.xlsx', index=False) - - -# In[29]: - - -# export csv brut des ids -journal_publisher_ids.to_csv('sample/journals_publishers_ids.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[30]: - - -# export excel brut des ids -journal_publisher_ids.to_excel('sample/journals_publishers_ids.xlsx', index=False) - diff --git a/import_scripts/05_oacct_issns.md b/import_scripts/05_oacct_issns.md deleted file mode 100644 index 39a18fd7..00000000 --- a/import_scripts/05_oacct_issns.md +++ /dev/null @@ -1,2109 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Table ISSNs - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -``` - - -```python -# ajout des ISSN-L -issns = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISSNISSN-L
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -issns = issns.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnl
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -journals = pd.read_csv('sample/journals_brut.tsv', encoding='utf-8', sep='\t', usecols=(['id', 'issn', 'issnl'])) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnl
011660-93791660-9379
120031-90070031-9007
231932-62031932-6203
342174-84542174-8454
451098-01211098-0121
............
9069970964-17260964-1726
9079980022-34680022-3468
9089991432-20640178-8051
90910000960-14810960-1481
91010010161-75670161-7567
-

911 rows × 3 columns

-
- - - - -```python -# renomer les colonnes id -journals = journals.rename(columns = {'id' : 'journal'}) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnissnl
011660-93791660-9379
120031-90070031-9007
231932-62031932-6203
342174-84542174-8454
451098-01211098-0121
............
9069970964-17260964-1726
9079980022-34680022-3468
9089991432-20640178-8051
90910000960-14810960-1481
91010010161-75670161-7567
-

911 rows × 3 columns

-
- - - - -```python -# test journals sans issn -journals.loc[journals['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - -
journalissnissnl
-
- - - - -```python -journals.loc[journals['journal'] == 5] -``` - - - - -
- - - - - - - - - - - - - - - - - - -
journalissnissnl
451098-01211098-0121
-
- - - -## Extraction du format - - -```python -# creation du DF -col_names = ['issn', - 'format' - ] -journals_format = pd.DataFrame(columns = col_names) -journals_format -``` - - - - -
- - - - - - - - - - - -
issnformat
-
- - - - -```python -# extraction des informations à partir des données ISSN.org -for index, row in journals.iterrows(): - # myid = row['journal'] - myissn = row['issn'] - # myissnl = row['issnl'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - myformat = np.nan - # export en json - if os.path.exists('issn/data/' + myissn + '.json'): - with open('issn/data/' + myissn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + myissn): - if ('format' in x): - myformats = x['format'] - if type(myformats) is list: - myformat = myformats[0].replace('vocabularies/medium#', '') - else : - myformat = myformats.replace('vocabularies/medium#', '') - # journals_format.at[index,'journal'] = myid - journals_format.at[index,'issn'] = myissn - # journals2.at[index,'issnl'] = myissnl - journals_format.at[index,'format'] = myformat - else : - print(row['issn'] + ' - pas trouvé') -``` - - 0 - 10 - 20 - 30 - 40 - 50 - 60 - 70 - 80 - 90 - 100 - 110 - 120 - 130 - 140 - 150 - 160 - 170 - 180 - 190 - 200 - 210 - 220 - 230 - 240 - 250 - 260 - 270 - 280 - 290 - 300 - 310 - 320 - 330 - 340 - 350 - 360 - 370 - 380 - 390 - 400 - 410 - 420 - 430 - 440 - 450 - 460 - 470 - 480 - 490 - 500 - 510 - 520 - 530 - 540 - 550 - 560 - 570 - 580 - 590 - 600 - 610 - 620 - 630 - 640 - 650 - 660 - 670 - 680 - 690 - 700 - 710 - 720 - 730 - 740 - 750 - 760 - 770 - 780 - 790 - 800 - 810 - 820 - 830 - 840 - 850 - 860 - 870 - 880 - 890 - 900 - 910 - - - -```python -journals_format -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnformat
01660-9379Print
10031-9007Print
21932-6203Online
32174-8454Print
41098-0121Print
.........
9060964-1726Print
9070022-3468Print
9081432-2064Online
9090960-1481Print
9100161-7567Print
-

911 rows × 2 columns

-
- - - - -```python -# test -journals_format.loc[journals_format['format'].isnull()] -``` - - - - -
- - - - - - - - - - - -
issnformat
-
- - - - -```python -journals_format['format'].value_counts() -``` - - - - - Print 817 - Online 92 - Other 2 - Name: format, dtype: int64 - - - - -```python -del journals['issn'] -``` - - -```python -issns = pd.merge(issns, journals, on='issnl', how='outer') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournal
00000-00190000-0019NaN
12150-40080000-0019NaN
20000-00270000-0027NaN
30000-00430000-0043NaN
40000-00510000-0051NaN
............
19959158756-99738756-9973NaN
19959168756-99818756-9981NaN
19959178756-999X8756-999XNaN
1995918NaN2624-8557120.0
1995919NaN0032-1052936.0
-

1995920 rows × 3 columns

-
- - - - -```python -# tester les lignes sans issn -issns.loc[issns['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournal
1995918NaN2624-8557120.0
1995919NaN0032-1052936.0
-
- - - - -```python -# garder les lilgnes non null -issns = issns.loc[issns['issn'].notna()] -``` - - -```python -# isoler les lignes avec marge -issns2 = issns.loc[issns['journal'].notna()] -issns2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournal
3340001-28150001-2815532.0
3351399-00390001-2815532.0
4930001-48420001-4842498.0
4941520-48980001-4842498.0
5050001-49660001-4966789.0
............
19213522470-00452470-0045533.0
19213532470-00532470-0045533.0
19257402475-99532475-9953608.0
19518542504-44272504-4427994.0
19518552504-44352504-4427994.0
-

1760 rows × 3 columns

-
- - - - -```python -# ajout du format par ISSN -issns2 = pd.merge(issns2, journals_format, on='issn', how='outer') -issns2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformat
00001-28150001-2815532.0Print
11399-00390001-2815532.0NaN
20001-48420001-4842498.0Print
31520-48980001-4842498.0NaN
40001-49660001-4966789.0Print
...............
17582504-44272504-4427994.0Print
17592504-44352504-4427994.0NaN
17602624-8557NaNNaNOnline
17612469-9926NaNNaNPrint
17621529-4242NaNNaNOnline
-

1763 rows × 4 columns

-
- - - - -```python -# isoler les lignes avec marge -issns2 = issns2.loc[issns2['journal'].notna()] -issns2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformat
00001-28150001-2815532.0Print
11399-00390001-2815532.0NaN
20001-48420001-4842498.0Print
31520-48980001-4842498.0NaN
40001-49660001-4966789.0Print
...............
17552470-00452470-0045533.0Other
17562470-00532470-0045533.0NaN
17572475-99532475-9953608.0Online
17582504-44272504-4427994.0Print
17592504-44352504-4427994.0NaN
-

1760 rows × 4 columns

-
- - - - -```python -issns2['format'] = issns2['format'].str.upper() -issns2['format'] = issns2['format'].str.replace('ONLINE', 'ELECTRONIC') -# DigitalCarrier -issns2['format'] = issns2['format'].str.replace('DIGITALCARRIER', 'ELECTRONIC') -issns2 -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - """Entry point for launching an IPython kernel. - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - after removing the cwd from sys.path. - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformat
00001-28150001-2815532.0PRINT
11399-00390001-2815532.0NaN
20001-48420001-4842498.0PRINT
31520-48980001-4842498.0NaN
40001-49660001-4966789.0PRINT
...............
17552470-00452470-0045533.0OTHER
17562470-00532470-0045533.0NaN
17572475-99532475-9953608.0ELECTRONIC
17582504-44272504-4427994.0PRINT
17592504-44352504-4427994.0NaN
-

1760 rows × 4 columns

-
- - - - -```python -issns2['format'].value_counts() -``` - - - - - PRINT 816 - ELECTRONIC 90 - OTHER 2 - Name: format, dtype: int64 - - - - -```python -# tester les lignes sans issn -issns2.loc[issns2['format'].isnull()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformat
11399-00390001-2815532.0NaN
31520-48980001-4842498.0NaN
51520-85240001-4966789.0NaN
61520-90240001-4966789.0NaN
80942-09400001-6268166.0NaN
...............
17502469-99342469-9926870.0NaN
17522469-99692469-995041.0NaN
17542470-00292470-001080.0NaN
17562470-00532470-0045533.0NaN
17592504-44352504-4427994.0NaN
-

852 rows × 4 columns

-
- - - - -```python -# attribution de l'id du type -# PRINT = 1 -# ELECTRONIC = 2 -# OTHER = 3 -issns2['issn_type'] = issns2['format'] -issns2['issn_type'] = issns2['issn_type'].str.replace('PRINT', '1') -issns2['issn_type'] = issns2['issn_type'].str.replace('ELECTRONIC', '2') -issns2['issn_type'] = issns2['issn_type'].str.replace('OTHER', '3') -issns2['issn_type'] = issns2['issn_type'].fillna(3) -issns2 -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:5: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - """ - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:6: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:7: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - import sys - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:8: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:9: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - if __name__ == '__main__': - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_type
00001-28150001-2815532.0PRINT1
11399-00390001-2815532.0NaN3
20001-48420001-4842498.0PRINT1
31520-48980001-4842498.0NaN3
40001-49660001-4966789.0PRINT1
..................
17552470-00452470-0045533.0OTHER3
17562470-00532470-0045533.0NaN3
17572475-99532475-9953608.0ELECTRONIC2
17582504-44272504-4427994.0PRINT1
17592504-44352504-4427994.0NaN3
-

1760 rows × 5 columns

-
- - - - -```python -# convertir journal en int -issns2['journal'] = issns2['journal'].astype(int) -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - -```python -# convertir l'index en id -issns2 = issns2.reset_index() -issns2['id'] = issns2['index'] + 1 -del issns2['index'] -issns2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeid
00001-28150001-2815532PRINT11
11399-00390001-2815532NaN32
20001-48420001-4842498PRINT13
31520-48980001-4842498NaN34
40001-49660001-4966789PRINT15
.....................
17552470-00452470-0045533OTHER31756
17562470-00532470-0045533NaN31757
17572475-99532475-9953608ELECTRONIC21758
17582504-44272504-4427994PRINT11759
17592504-44352504-4427994NaN31760
-

1760 rows × 6 columns

-
- - - - -```python -issns2['issn_type'] = issns2['issn_type'].astype(int) -``` - - -```python -# supprimer les doublons par ISSN -issns2 = issns2.drop_duplicates(subset='issn') -issns2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeid
00001-28150001-2815532PRINT11
11399-00390001-2815532NaN32
20001-48420001-4842498PRINT13
31520-48980001-4842498NaN34
40001-49660001-4966789PRINT15
.....................
17552470-00452470-0045533OTHER31756
17562470-00532470-0045533NaN31757
17572475-99532475-9953608ELECTRONIC21758
17582504-44272504-4427994PRINT11759
17592504-44352504-4427994NaN31760
-

1760 rows × 6 columns

-
- - - - -```python -# export csv -issns2.to_csv('sample/issn_brut.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -issns2.to_excel('sample/issn_brut.xlsx', index=False) -``` - - -```python -# export CSV des IDs -issns2[['id', 'issn', 'issnl', 'journal']].to_csv('sample/issn_ids.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel des IDs -issns2[['id', 'issn', 'issnl', 'journal']].to_excel('sample/issn_ids.xlsx', index=False) -``` diff --git a/import_scripts/05_oacct_issns.py b/import_scripts/05_oacct_issns.py deleted file mode 100644 index af282efb..00000000 --- a/import_scripts/05_oacct_issns.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Table ISSNs - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os - - -# In[2]: - - -# ajout des ISSN-L -issns = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issns - - -# In[3]: - - -# renommer les colonnes -issns = issns.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -issns - - -# In[4]: - - -journals = pd.read_csv('sample/journals_brut.tsv', encoding='utf-8', sep='\t', usecols=(['id', 'issn', 'issnl'])) -journals - - -# In[5]: - - -# renomer les colonnes id -journals = journals.rename(columns = {'id' : 'journal'}) -journals - - -# In[6]: - - -# test journals sans issn -journals.loc[journals['issn'].isna()] - - -# In[7]: - - -journals.loc[journals['journal'] == 5] - - -# ## Extraction du format - -# In[8]: - - -# creation du DF -col_names = ['issn', - 'format' - ] -journals_format = pd.DataFrame(columns = col_names) -journals_format - - -# In[9]: - - -# extraction des informations à partir des données ISSN.org -for index, row in journals.iterrows(): - # myid = row['journal'] - myissn = row['issn'] - # myissnl = row['issnl'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - myformat = np.nan - # export en json - if os.path.exists('issn/data/' + myissn + '.json'): - with open('issn/data/' + myissn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + myissn): - if ('format' in x): - myformats = x['format'] - if type(myformats) is list: - myformat = myformats[0].replace('vocabularies/medium#', '') - else : - myformat = myformats.replace('vocabularies/medium#', '') - # journals_format.at[index,'journal'] = myid - journals_format.at[index,'issn'] = myissn - # journals2.at[index,'issnl'] = myissnl - journals_format.at[index,'format'] = myformat - else : - print(row['issn'] + ' - pas trouvé') - - -# In[10]: - - -journals_format - - -# In[11]: - - -# test -journals_format.loc[journals_format['format'].isnull()] - - -# In[12]: - - -journals_format['format'].value_counts() - - -# In[13]: - - -del journals['issn'] - - -# In[14]: - - -issns = pd.merge(issns, journals, on='issnl', how='outer') -issns - - -# In[15]: - - -# tester les lignes sans issn -issns.loc[issns['issn'].isna()] - - -# In[16]: - - -# garder les lilgnes non null -issns = issns.loc[issns['issn'].notna()] - - -# In[17]: - - -# isoler les lignes avec marge -issns2 = issns.loc[issns['journal'].notna()] -issns2 - - -# In[18]: - - -# ajout du format par ISSN -issns2 = pd.merge(issns2, journals_format, on='issn', how='outer') -issns2 - - -# In[19]: - - -# isoler les lignes avec marge -issns2 = issns2.loc[issns2['journal'].notna()] -issns2 - - -# In[20]: - - -issns2['format'] = issns2['format'].str.upper() -issns2['format'] = issns2['format'].str.replace('ONLINE', 'ELECTRONIC') -# DigitalCarrier -issns2['format'] = issns2['format'].str.replace('DIGITALCARRIER', 'ELECTRONIC') -issns2 - - -# In[21]: - - -issns2['format'].value_counts() - - -# In[22]: - - -# tester les lignes sans issn -issns2.loc[issns2['format'].isnull()] - - -# In[23]: - - -# attribution de l'id du type -# PRINT = 1 -# ELECTRONIC = 2 -# OTHER = 3 -issns2['issn_type'] = issns2['format'] -issns2['issn_type'] = issns2['issn_type'].str.replace('PRINT', '1') -issns2['issn_type'] = issns2['issn_type'].str.replace('ELECTRONIC', '2') -issns2['issn_type'] = issns2['issn_type'].str.replace('OTHER', '3') -issns2['issn_type'] = issns2['issn_type'].fillna(3) -issns2 - - -# In[24]: - - -# convertir journal en int -issns2['journal'] = issns2['journal'].astype(int) - - -# In[25]: - - -# convertir l'index en id -issns2 = issns2.reset_index() -issns2['id'] = issns2['index'] + 1 -del issns2['index'] -issns2 - - -# In[26]: - - -issns2['issn_type'] = issns2['issn_type'].astype(int) - - -# In[27]: - - -# supprimer les doublons par ISSN -issns2 = issns2.drop_duplicates(subset='issn') -issns2 - - -# In[28]: - - -# export csv -issns2.to_csv('sample/issn_brut.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[29]: - - -# export excel -issns2.to_excel('sample/issn_brut.xlsx', index=False) - - -# In[30]: - - -# export CSV des IDs -issns2[['id', 'issn', 'issnl', 'journal']].to_csv('sample/issn_ids.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[31]: - - -# export excel des IDs -issns2[['id', 'issn', 'issnl', 'journal']].to_excel('sample/issn_ids.xlsx', index=False) - diff --git a/import_scripts/06_oacct_sherpa.md b/import_scripts/06_oacct_sherpa.md deleted file mode 100644 index b7077461..00000000 --- a/import_scripts/06_oacct_sherpa.md +++ /dev/null @@ -1,9819 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données de Sherpa/Romeo obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Données de Sherpa/Romeo - -### Exemple - -https://v2.sherpa.ac.uk/cgi/retrieve_by_id?item-type=publication&api-key=EEE6F146-678E-11EB-9C3A-202F3DE2659A&format=Json&identifier=17601 - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) -``` - -## Table publisher_sherpa - - -```python -# creation du DF -col_names = ['journal', - 'publisher_id', - 'name', - 'country', - 'type', - 'url' - ] -publisher_sherpa = pd.DataFrame(columns = col_names) -publisher_sherpa -``` - - - - -
- - - - - - - - - - - - - - - -
journalpublisher_idnamecountrytypeurl
-
- - - -## Table sherpa match issn - - -```python -# creation du DF -col_names = ['issn', - 'sherpa_match', - ] -sherpa_match_issn = pd.DataFrame(columns = col_names) -sherpa_match_issn -``` - - - - -
- - - - - - - - - - - -
issnsherpa_match
-
- - - -## Table sherpa issns - - -```python -# creation du DF -col_names = ['issn', - 'type', - ] -sherpa_issn = pd.DataFrame(columns = col_names) -sherpa_issn -``` - - - - -
- - - - - - - - - - - -
issntype
-
- - - -## Table sherpa journals - - -```python -# creation du DF -col_names = ['journal', - 'title', - 'url', - ] -sherpa_journal = pd.DataFrame(columns = col_names) -sherpa_journal -``` - - - - -
- - - - - - - - - - - - -
journaltitleurl
-
- - - -## Import table Journals et ISSN - - -```python -journal = pd.read_csv('sample/journals_publishers_brut.tsv', encoding='utf-8', header=0, sep='\t') -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_statuspublisher
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN11
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.012
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1.0Yes1.0PLoS One1.0NaN0.0NaN0.04.035714NaN53
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN14, 5
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.016
...........................................................................
9069970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN147
9079980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN175
9089991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18
90910000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN1119
91010010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN1217
-

911 rows × 24 columns

-
- - - - -```python -issn = pd.read_csv('sample/issn_brut.tsv', encoding='utf-8', header=0, sep='\t') -issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeid
00001-28150001-2815532PRINT11
11399-00390001-2815532NaN32
20001-48420001-4842498PRINT13
31520-48980001-4842498NaN34
40001-49660001-4966789PRINT15
.....................
17552470-00452470-0045533OTHER31756
17562470-00532470-0045533NaN31757
17572475-99532475-9953608ELECTRONIC21758
17582504-44272504-4427994PRINT11759
17592504-44352504-4427994NaN31760
-

1760 rows × 6 columns

-
- - - - -```python -issn_ids = pd.read_csv('sample/issn_ids.tsv', encoding='utf-8', header=0, sep='\t') -issn_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnljournal
010001-28150001-2815532
121399-00390001-2815532
230001-48420001-4842498
341520-48980001-4842498
450001-49660001-4966789
...............
175517562470-00452470-0045533
175617572470-00532470-0045533
175717582475-99532475-9953608
175817592504-44272504-4427994
175917602504-44352504-4427994
-

1760 rows × 4 columns

-
- - - -## Extraction de Sherpa Romeo - - -```python -# extraction des informations à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # if (((index/10) - int(index/10)) == 0) : - # print(index) - # initialisation des variables à extraire - publisher_id = np.nan - publisher_name = '' - publisher_country = '' - publisher_type = '' - publisher_url = '' - # boucle des fichiers json - # test d'existance du fichier - # print(row['issn']) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - if (len(data['items']) > 0): - publisher_id = data['items'][0]['publishers'][0]['publisher']['id'] - if ('country' in data['items'][0]['publishers'][0]['publisher']): - publisher_country = data['items'][0]['publishers'][0]['publisher']['country'] - if ('relationship_type' in data['items'][0]['publishers'][0]): - publisher_type = data['items'][0]['publishers'][0]['relationship_type'] - if ('url' in data['items'][0]['publishers'][0]['publisher']): - publisher_url = data['items'][0]['publishers'][0]['publisher']['url'] - if ('name' in data['items'][0]['publishers'][0]['publisher']['name'][0]): - publisher_name = data['items'][0]['publishers'][0]['publisher']['name'][0]['name'] - sherpa_match = 'OK' - publisher_sherpa = publisher_sherpa.append({'journal' : journal_id, 'publisher_id' : publisher_id, - 'name' : publisher_name, 'country' : publisher_country, - 'type' : publisher_type, 'url' : publisher_url}, ignore_index=True) - else : - print(row['issn'] + ' - trouvé mais vide') - sherpa_match = 'empty' - else : - print(row['issn'] + ' - pas trouvé') - sherpa_match = 'missing' - sherpa_match_issn = sherpa_match_issn.append({'issn' : row['issn'], 'sherpa_match' : sherpa_match}, ignore_index=True) -``` - - 1399-0039 - pas trouvé - 1520-8524 - trouvé mais vide - 1520-9024 - pas trouvé - 1468-2834 - pas trouvé - 1551-2916 - pas trouvé - 1943-2984 - pas trouvé - 1555-7162 - trouvé mais vide - 2163-5773 - pas trouvé - 1873-4324 - trouvé mais vide - 1526-7598 - pas trouvé - 1673-3134 - pas trouvé - 1777-5884 - pas trouvé - 1528-1140 - pas trouvé - 1468-2060 - pas trouvé - 1552-6259 - pas trouvé - 0003-6935 - trouvé mais vide - 1520-8842 - pas trouvé - 0003-9926 - trouvé mais vide - 1538-3679 - pas trouvé - 0003-9942 - trouvé mais vide - 1538-3687 - pas trouvé - 1529-0131 - pas trouvé - 1090-2104 - trouvé mais vide - 1943-295X - pas trouvé - 1878-2434 - pas trouvé - 1873-2402 - trouvé mais vide - 1872-6240 - trouvé mais vide - 1365-2133 - pas trouvé - 0007-4403 - trouvé mais vide - 1968-3766 - pas trouvé - 0008-042X - trouvé mais vide - 2104-3329 - pas trouvé - 2268-7963 - pas trouvé - 1873-3948 - trouvé mais vide - 1873-4405 - trouvé mais vide - 1872-6836 - trouvé mais vide - 1873-4448 - trouvé mais vide - 1524-4571 - trouvé mais vide - 1873-7838 - trouvé mais vide - 1879-2944 - trouvé mais vide - 1873-3840 - trouvé mais vide - 1973-8102 - trouvé mais vide - 0011-1600 - trouvé mais vide - 1968-3901 - pas trouvé - 1879-2235 - trouvé mais vide - 1095-564X - trouvé mais vide - 1931-3543 - pas trouvé - 1385-013X - trouvé mais vide - 1873-3859 - trouvé mais vide - 1873-7315 - trouvé mais vide - 0013-8584 - trouvé mais vide - 2309-4672 - pas trouvé - 0014-2239 - trouvé mais vide - 2272-9011 - pas trouvé - 0945-5795 - pas trouvé - 1432-1033 - pas trouvé - 1365-2362 - pas trouvé - 1090-2422 - trouvé mais vide - 1026-7484 - trouvé mais vide - 1528-0012 - trouvé mais vide - 1872-9533 - trouvé mais vide - 0016-9161 - trouvé mais vide - 2297-7953 - pas trouvé - 1879-2189 - trouvé mais vide - 0018-0238 - trouvé mais vide - 2297-1971 - pas trouvé - 2334-3303 - pas trouvé - 1070-6313 - pas trouvé - 1873-3255 - trouvé mais vide - 1097-0215 - pas trouvé - 1879-2146 - trouvé mais vide - 0021-8170 - trouvé mais vide - 2114-6292 - pas trouvé - 1090-266X - trouvé mais vide - 1520-8850 - trouvé mais vide - 1879-1484 - trouvé mais vide - 1067-8832 - pas trouvé - 1067-8816 - pas trouvé - 1873-2380 - trouvé mais vide - 1090-2694 - trouvé mais vide - 1520-9032 - pas trouvé - 1873-3778 - trouvé mais vide - 1945-7197 - pas trouvé - 0021-9797 - trouvé mais vide - 1090-2716 - trouvé mais vide - 1873-5002 - pas trouvé - 0022-0728 - trouvé mais vide - 1879-2707 - trouvé mais vide - 1872-7883 - trouvé mais vide - 1527-2427 - trouvé mais vide - 1089-8638 - trouvé mais vide - 1873-4820 - trouvé mais vide - 1872-8561 - trouvé mais vide - 1531-5037 - trouvé mais vide - 1085-8695 - pas trouvé - 1097-6833 - pas trouvé - 1879-2553 - trouvé mais vide - 1097-6841 - pas trouvé - 2050-5639 - pas trouvé - 1873-4782 - trouvé mais vide - 1878-5883 - trouvé mais vide - 1085-8687 - pas trouvé - 1097-685X - pas trouvé - 1070-6321 - pas trouvé - 1091-756X - pas trouvé - 1939-5590 - trouvé mais vide - 1939-5604 - pas trouvé - 1873-1856 - trouvé mais vide - 1872-6143 - pas trouvé - 0025-6749 - trouvé mais vide - 1423-0356 - pas trouvé - 0026-4598 - pas trouvé - 1432-1874 - pas trouvé - 0027-4054 - trouvé mais vide - 1873-3514 - trouvé mais vide - 1873-0310 - trouvé mais vide - 1872-616X - pas trouvé - 1402-4896 - pas trouvé - 0031-8965 - trouvé mais vide - 1521-396X - pas trouvé - 1092-0145 - trouvé mais vide - 1873-3700 - pas trouvé - 1532-2548 - pas trouvé - 1527-2400 - trouvé mais vide - 0035-1121 - trouvé mais vide - 1760-7426 - pas trouvé - 0035-1784 - trouvé mais vide - 2297-1254 - pas trouvé - 0035-3655 - trouvé mais vide - 2104-385X - pas trouvé - 0036-7486 - trouvé mais vide - 1424-4004 - trouvé mais vide - 0036-7672 - trouvé mais vide - 0036-7699 - trouvé mais vide - 0036-7893 - trouvé mais vide - 2504-1452 - pas trouvé - 1471-1257 - pas trouvé - 1879-2766 - trouvé mais vide - 1879-2405 - trouvé mais vide - 1879-2758 - trouvé mais vide - 1464-5416 - pas trouvé - 1873-3581 - pas trouvé - 1664-2864 - pas trouvé - 1879-2731 - pas trouvé - 1534-6080 - trouvé mais vide - 1873-2623 - pas trouvé - 1096-0341 - trouvé mais vide - 1878-5646 - trouvé mais vide - 1879-2448 - pas trouvé - 1879-1298 - trouvé mais vide - 1879-2138 - trouvé mais vide - 0046-2497 - trouvé mais vide - 1776-2936 - pas trouvé - 1873-7625 - trouvé mais vide - 1879-2472 - pas trouvé - 2214-8019 - trouvé mais vide - 0065-7727 - trouvé mais vide - 1070-6283 - pas trouvé - 0066-6653 - trouvé mais vide - 0072-0585 - trouvé mais vide - 1079-2376 - pas trouvé - 1557-7988 - trouvé mais vide - 0081-1254 - trouvé mais vide - 1523-1755 - pas trouvé - 1085-8725 - pas trouvé - 1097-6825 - trouvé mais vide - 1096-0260 - pas trouvé - 1522-8541 - pas trouvé - 1551-7616 - pas trouvé - 1935-0465 - pas trouvé - 1070-633X - pas trouvé - 1873-4375 - trouvé mais vide - 1070-6291 - pas trouvé - 0108-2701 - trouvé mais vide - 1600-5759 - pas trouvé - 1879-0097 - pas trouvé - 1879-2081 - pas trouvé - 1873-7323 - trouvé mais vide - 1879-3452 - trouvé mais vide - 1878-5905 - trouvé mais vide - 1532-1991 - pas trouvé - 1071-2763 - pas trouvé - 1071-8842 - pas trouvé - 2156-2202 - pas trouvé - 1081-1281 - pas trouvé - 1873-7528 - trouvé mais vide - 1773-0406 - trouvé mais vide - 0151-0193 - trouvé mais vide - 2101-0218 - trouvé mais vide - 0161-7567 - trouvé mais vide - 2160-9292 - trouvé mais vide - 1095-3795 - trouvé mais vide - 1872-678X - trouvé mais vide - 1573-2517 - pas trouvé - 1872-7557 - trouvé mais vide - 1872-7123 - trouvé mais vide - 1872-7441 - trouvé mais vide - 1872-7999 - pas trouvé - 1879-1514 - pas trouvé - 1874-1754 - trouvé mais vide - 1872-7697 - trouvé mais vide - 1873-5568 - trouvé mais vide - 1872-7352 - pas trouvé - 1872-9584 - trouvé mais vide - 1600-0641 - trouvé mais vide - 1872-9576 - trouvé mais vide - 1873-5460 - pas trouvé - 1873-5584 - trouvé mais vide - 1872-695X - pas trouvé - 1432-0827 - pas trouvé - 1432-1262 - pas trouvé - 0181-5512 - trouvé mais vide - 1773-0597 - pas trouvé - 1879-2367 - trouvé mais vide - 1532-2939 - trouvé mais vide - 1527-3296 - pas trouvé - 1558-1497 - trouvé mais vide - 0221-5918 - trouvé mais vide - 0248-8663 - trouvé mais vide - 1768-3122 - trouvé mais vide - 0252-1881 - trouvé mais vide - 0252-2969 - trouvé mais vide - 1661-5468 - pas trouvé - 0254-945X - trouvé mais vide - 1662-9760 - pas trouvé - 0255-9005 - trouvé mais vide - 0258-6800 - trouvé mais vide - 1432-0819 - pas trouvé - 0259-6199 - trouvé mais vide - 1661-3171 - trouvé mais vide - 1532-1983 - pas trouvé - 1873-2518 - trouvé mais vide - 1365-2346 - pas trouvé - 1476-5365 - pas trouvé - 1067-8824 - pas trouvé - 0271-4302 - trouvé mais vide - 2158-1525 - pas trouvé - 1536-4801 - pas trouvé - 1873-457X - pas trouvé - 1531-5053 - pas trouvé - 1470-8752 - pas trouvé - 1879-176X - pas trouvé - 1873-4421 - pas trouvé - 1432-1998 - pas trouvé - 1873-6246 - pas trouvé - 1873-6777 - pas trouvé - 1879-3533 - trouvé mais vide - 1872-8057 - trouvé mais vide - 1872-7972 - trouvé mais vide - 1879-2723 - trouvé mais vide - 1879-2774 - pas trouvé - 1873-4766 - trouvé mais vide - 1362-4954 - pas trouvé - 1365-2842 - pas trouvé - 1361-6447 - trouvé mais vide - 1872-9118 - trouvé mais vide - 1873-7544 - trouvé mais vide - 1873-3360 - pas trouvé - 1873-2100 - pas trouvé - 1872-9657 - trouvé mais vide - 1499-2752 - pas trouvé - 2567-689X - trouvé mais vide - 1432-1238 - pas trouvé - 1873-684X - trouvé mais vide - 1879-355X - trouvé mais vide - 1879-3487 - trouvé mais vide - 1873-6785 - trouvé mais vide - 1546-3141 - pas trouvé - 0362-1340 - trouvé mais vide - 1523-2867 - pas trouvé - 1558-1160 - trouvé mais vide - 1432-2323 - pas trouvé - 0365-7116 - trouvé mais vide - 1873-2526 - pas trouvé - 0368-4466 - trouvé mais vide - 1588-2926 - pas trouvé - 0369-3392 - trouvé mais vide - 1873-2445 - trouvé mais vide - 0373-2525 - trouvé mais vide - 0373-2967 - trouvé mais vide - 2235-3658 - pas trouvé - 0373-6156 - trouvé mais vide - 2391-1336 - pas trouvé - 0374-4256 - trouvé mais vide - 0375-1457 - trouvé mais vide - 2419-8196 - pas trouvé - 1873-2429 - trouvé mais vide - 1872-6097 - pas trouvé - 1872-6860 - trouvé mais vide - 1574-6968 - pas trouvé - 1879-0038 - trouvé mais vide - 1873-3476 - trouvé mais vide - 1873-2755 - trouvé mais vide - 1872-6178 - trouvé mais vide - 1873-2046 - trouvé mais vide - 1872-6283 - trouvé mais vide - 0398-3412 - trouvé mais vide - 2297-5810 - pas trouvé - 0409-8757 - trouvé mais vide - 1461-7412 - pas trouvé - 1873-1562 - trouvé mais vide - 1089-4918 - trouvé mais vide - 1538-4500 - pas trouvé - 0570-0833 - trouvé mais vide - 0583-8401 - trouvé mais vide - 1872-7727 - trouvé mais vide - 1873-264X - trouvé mais vide - 1527-7755 - pas trouvé - 1520-8559 - trouvé mais vide - 1558-3597 - trouvé mais vide - 1873-5134 - pas trouvé - 1096-3677 - pas trouvé - 2213-0276 - pas trouvé - 1958-5381 - pas trouvé - 1651-2227 - pas trouvé - 0884-1616 - trouvé mais vide - 1091-8876 - pas trouvé - 1092-8928 - pas trouvé - 1089-8646 - pas trouvé - 0888-8809 - trouvé mais vide - 1944-9917 - trouvé mais vide - 1532-0987 - pas trouvé - 0894-8275 - trouvé mais vide - 1878-5921 - pas trouvé - 1520-636X - pas trouvé - 1399-3038 - pas trouvé - 1873-7196 - trouvé mais vide - 1873-4308 - trouvé mais vide - 1573-2509 - trouvé mais vide - 1879-0658 - trouvé mais vide - 1873-2135 - pas trouvé - 1873-2143 - pas trouvé - 1873-4936 - trouvé mais vide - 1873-4944 - pas trouvé - 1872-793X - trouvé mais vide - 1873-3069 - pas trouvé - 1872-8286 - trouvé mais vide - 1873-3077 - pas trouvé - 1873-4669 - trouvé mais vide - 1873-3883 - trouvé mais vide - 0926-9630 - trouvé mais vide - 1879-8365 - trouvé mais vide - 1879-3398 - trouvé mais vide - 1873-4359 - trouvé mais vide - 1879-0720 - trouvé mais vide - 1769-664X - pas trouvé - 1432-2218 - pas trouvé - 1866-6817 - pas trouvé - 1432-2277 - pas trouvé - 1435-4373 - pas trouvé - 1433-2965 - pas trouvé - 1873-3441 - pas trouvé - 1362-3044 - pas trouvé - 1879-0526 - trouvé mais vide - 1879-0828 - pas trouvé - 1879-0410 - trouvé mais vide - 1873-619X - trouvé mais vide - 1873-4235 - trouvé mais vide - 1362-511X - pas trouvé - 1879-0429 - trouvé mais vide - 1879-1786 - trouvé mais vide - 1879-0852 - pas trouvé - 1879-0682 - pas trouvé - 1873-2976 - trouvé mais vide - 1464-3405 - trouvé mais vide - 1466-1861 - pas trouvé - 1555-3892 - pas trouvé - 1360-0443 - pas trouvé - 1464-3391 - trouvé mais vide - 1879-2359 - pas trouvé - 0992-986X - trouvé mais vide - 2119-4130 - pas trouvé - 0995-3817 - trouvé mais vide - 2219-2840 - pas trouvé - 1010-2248 - trouvé mais vide - 1664-9885 - pas trouvé - 1873-2666 - pas trouvé - 1017-0588 - trouvé mais vide - 1018-7987 - trouvé mais vide - 1019-0406 - trouvé mais vide - 1023-2044 - trouvé mais vide - 1023-9332 - trouvé mais vide - 2235-1884 - pas trouvé - 1560-7917 - pas trouvé - 1026-7530 - pas trouvé - 1607-8489 - pas trouvé - 1127-2236 - pas trouvé - 1938-808X - pas trouvé - 1095-8657 - trouvé mais vide - 1536-3732 - pas trouvé - 1049-5258 - trouvé mais vide - 1538-4446 - pas trouvé - 1095-9572 - trouvé mais vide - 1532-6500 - trouvé mais vide - 1059-1524 - trouvé mais vide - 1095-3787 - trouvé mais vide - 1538-4519 - trouvé mais vide - 1063-6919 - trouvé mais vide - 2332-564X - pas trouvé - 2575-7075 - pas trouvé - 1940-6029 - trouvé mais vide - 1527-2435 - pas trouvé - 1527-2419 - pas trouvé - 1071-1023 - trouvé mais vide - 1520-8567 - pas trouvé - 1090-235X - trouvé mais vide - 1532-2130 - pas trouvé - 1096-0856 - trouvé mais vide - 1538-4489 - pas trouvé - 1155-4339 - trouvé mais vide - 1764-7177 - pas trouvé - 1460-9592 - pas trouvé - 1878-3511 - pas trouvé - 1778-7254 - pas trouvé - 1873-4030 - pas trouvé - 1873-2844 - trouvé mais vide - 1873-5126 - trouvé mais vide - 1873-5606 - pas trouvé - 1873-2453 - trouvé mais vide - 1872-8456 - pas trouvé - 2040-2058 - pas trouvé - 1878-5840 - trouvé mais vide - 1473-6519 - pas trouvé - 1879-0690 - trouvé mais vide - 1466-609X - pas trouvé - 1367-4811 - trouvé mais vide - 1873-4286 - pas trouvé - 1873-3212 - trouvé mais vide - 1873-1759 - pas trouvé - 1875-8908 - trouvé mais vide - 1872-8952 - trouvé mais vide - 1873-1902 - trouvé mais vide - 1600-0854 - pas trouvé - 1420-5556 - trouvé mais vide - 1420-7192 - trouvé mais vide - 1662-0879 - pas trouvé - 1422-2019 - trouvé mais vide - 1422-3449 - trouvé mais vide - 1422-5778 - trouvé mais vide - 2504-1436 - pas trouvé - 1423-3967 - trouvé mais vide - 1663-3997 - pas trouvé - 1424-1811 - trouvé mais vide - 2504-1460 - pas trouvé - 1424-4020 - pas trouvé - 1424-7410 - trouvé mais vide - 1424-7755 - trouvé mais vide - 1436-3771 - pas trouvé - 1434-6028 - trouvé mais vide - 1434-6036 - trouvé mais vide - 1439-4456 - pas trouvé - 1449-8979 - pas trouvé - 1873-6416 - trouvé mais vide - 1465-6914 - trouvé mais vide - 1478-6362 - pas trouvé - 1520-6149 - trouvé mais vide - 2379-190X - trouvé mais vide - 1522-1601 - pas trouvé - 1708-8208 - pas trouvé - 1944-7884 - pas trouvé - 1527-6473 - pas trouvé - 1947-3893 - pas trouvé - 1530-1591 - trouvé mais vide - 1558-1101 - pas trouvé - 1860-2002 - pas trouvé - 1552-5279 - pas trouvé - 1557-170X - trouvé mais vide - 1878-5530 - trouvé mais vide - 1878-1519 - trouvé mais vide - 1569-9293 - pas trouvé - 1873-376X - pas trouvé - 1720-8319 - pas trouvé - 1610-0379 - trouvé mais vide - 1610-0387 - pas trouvé - 1778-3569 - trouvé mais vide - 1660-3362 - trouvé mais vide - 1660-9379 - trouvé mais vide - 1660-9603 - trouvé mais vide - 1661-1179 - trouvé mais vide - 1661-2620 - trouvé mais vide - 1661-464X - trouvé mais vide - 1661-4941 - trouvé mais vide - 1661-8165 - pas trouvé - 1662-551X - pas trouvé - 1662-5536 - trouvé mais vide - 1662-6001 - trouvé mais vide - 1662-601X - pas trouvé - 1662-8705 - trouvé mais vide - 1777-5477 - trouvé mais vide - 1810-7621 - pas trouvé - 1863-2300 - pas trouvé - 1873-2763 - trouvé mais vide - 1876-7737 - pas trouvé - 1878-8769 - trouvé mais vide - 1939-5175 - trouvé mais vide - 1945-7928 - trouvé mais vide - 1945-7936 - pas trouvé - 1945-8452 - trouvé mais vide - 1992-2655 - trouvé mais vide - 2050-7534 - trouvé mais vide - 2101-6275 - pas trouvé - 2161-2129 - pas trouvé - 2160-5033 - trouvé mais vide - 2160-5041 - pas trouvé - 2160-9020 - trouvé mais vide - 2160-9047 - pas trouvé - 2164-3342 - trouvé mais vide - 2174-8454 - trouvé mais vide - 2340-115X - pas trouvé - 2211-3282 - trouvé mais vide - 2264-7228 - trouvé mais vide - 2297-0703 - trouvé mais vide - 2297-6981 - trouvé mais vide - 2297-7007 - pas trouvé - 2352-1791 - trouvé mais vide - 2504-4427 - trouvé mais vide - 2504-4435 - trouvé mais vide - - - -```python -publisher_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idnamecountrytypeurl
053245John Wiley and Sonsgbformer_publisherhttp://www.wiley.com/
14984American Chemical Societyussociety_publisherhttp://pubs.acs.org/
24984American Chemical Societyussociety_publisherhttp://pubs.acs.org/
3789126Acoustical Society of Americaussociety_publisherhttp://acousticalsociety.org/
41663291Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
.....................
12388010American Physical Societyussociety_publisherhttp://www.aps.org/
12398010American Physical Societyussociety_publisherhttp://www.aps.org/
124053310American Physical Societyussociety_publisherhttp://www.aps.org/
124153310American Physical Societyussociety_publisherhttp://www.aps.org/
124260810American Physical Societyussociety_publisherhttp://www.aps.org/
-

1243 rows × 6 columns

-
- - - - -```python -sherpa_match_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnsherpa_match
00001-2815OK
11399-0039missing
20001-4842OK
31520-4898OK
40001-4966OK
.........
17552470-0045OK
17562470-0053OK
17572475-9953OK
17582504-4427empty
17592504-4435empty
-

1760 rows × 2 columns

-
- - - - -```python -# dedup -publisher_sherpa_dedup = publisher_sherpa.drop_duplicates() -publisher_sherpa_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idnamecountrytypeurl
053245John Wiley and Sonsgbformer_publisherhttp://www.wiley.com/
14984American Chemical Societyussociety_publisherhttp://pubs.acs.org/
3789126Acoustical Society of Americaussociety_publisherhttp://acousticalsociety.org/
41663291Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
68073291Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
.....................
123587010American Physical Societyussociety_publisherhttp://www.aps.org/
12364110American Physical Societyussociety_publisherhttp://www.aps.org/
12388010American Physical Societyussociety_publisherhttp://www.aps.org/
124053310American Physical Societyussociety_publisherhttp://www.aps.org/
124260810American Physical Societyussociety_publisherhttp://www.aps.org/
-

808 rows × 6 columns

-
- - - - -```python -sherpa_match_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnsherpa_match
00001-2815OK
11399-0039missing
20001-4842OK
31520-4898OK
40001-4966OK
.........
17552470-0045OK
17562470-0053OK
17572475-9953OK
17582504-4427empty
17592504-4435empty
-

1760 rows × 2 columns

-
- - - - -```python -# ajout du issnl et du titre -sherpa_match_issn = pd.merge(sherpa_match_issn, issn_ids, on='issn', how='left') -sherpa_match_issn = pd.merge(sherpa_match_issn, journal[['issnl', 'title']], on='issnl', how='left') -sherpa_match_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnsherpa_matchidissnljournaltitle
00001-2815OK10001-2815532Tissue antigens
11399-0039missing20001-2815532Tissue antigens
20001-4842OK30001-4842498Accounts of chemical research
31520-4898OK40001-4842498Accounts of chemical research
40001-4966OK50001-4966789The Journal of the Acoustical Society of America
.....................
17552470-0045OK17562470-0045533Physical review. E (Print)
17562470-0053OK17572470-0045533Physical review. E (Print)
17572475-9953OK17582475-9953608Physical review materials
17582504-4427empty17592504-4427994GG@G (Print)
17592504-4435empty17602504-4427994GG@G (Print)
-

1760 rows × 6 columns

-
- - - - -```python -sherpa_match_results = sherpa_match_issn[['id', 'issnl', 'sherpa_match']].groupby(['issnl', 'sherpa_match']).count() -sherpa_match_results -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id
issnlsherpa_match
0001-2815OK1
missing1
0001-4842OK2
0001-4966OK1
empty1
.........
2469-9950OK2
2470-0010OK2
2470-0045OK2
2475-9953OK1
2504-4427empty2
-

1302 rows × 1 columns

-
- - - - -```python -sherpa_match_results = sherpa_match_results.reset_index() -sherpa_match_results -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlsherpa_matchid
00001-2815OK1
10001-2815missing1
20001-4842OK2
30001-4966OK1
40001-4966empty1
............
12972469-9950OK2
12982470-0010OK2
12992470-0045OK2
13002475-9953OK1
13012504-4427empty2
-

1302 rows × 3 columns

-
- - - - -```python -sherpa_match_results_ok = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'OK'] -issn_ids_issnl = issn_ids[['issnl', 'journal']].drop_duplicates(subset='issnl') -issn_ids_issnl = pd.merge(issn_ids_issnl, sherpa_match_results_ok, on='issnl', how='left') -issn_ids_issnl = pd.merge(issn_ids_issnl, journal[['issnl', 'title']], on='issnl', how='left') -issn_ids_issnl -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnljournalsherpa_matchidtitle
00001-2815532OK1.0Tissue antigens
10001-4842498OK2.0Accounts of chemical research
20001-4966789OK1.0The Journal of the Acoustical Society of America
30001-6268166OK2.0Acta neurochirurgica
40001-6322807OK2.0Acta neuropathologica
..................
9042469-995041OK2.0Physical review. B
9052470-001080OK2.0Physical review. D
9062470-0045533OK2.0Physical review. E (Print)
9072475-9953608OK1.0Physical review materials
9082504-4427994NaNNaNGG@G (Print)
-

909 rows × 5 columns

-
- - - - -```python -journals_not_sherpa = issn_ids_issnl.loc[issn_ids_issnl['sherpa_match'].isna()] -journals_not_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnljournalsherpa_matchidtitle
240003-6935398NaNNaNApplied optics
270003-9926605NaNNaNArchives of internal medicine (1960)
280003-9942974NaNNaNArchives of neurology (Chicago)
470007-4403885NaNNaNBulletin de psychologie
480008-042X180NaNNaNCahiers pédagogiques (Revue)
..................
8892264-7228503NaNNaNDistances et médiations des savoirs
8922297-0703989NaNNaNSchweizer Krebs-Bulletin
8932297-6981618NaNNaNSwiss archives of neurology, psychiatry and ps...
8982352-1791639NaNNaNNuclear materials and energy
9082504-4427994NaNNaNGG@G (Print)
-

101 rows × 5 columns

-
- - - - -```python -sherpa_match_results_empty = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'empty'] -sherpa_match_results_missing = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'missing'] -del journals_not_sherpa['sherpa_match'] -del journals_not_sherpa['id'] -journals_not_sherpa = pd.merge(journals_not_sherpa, sherpa_match_results_empty, on='issnl', how='left') -del journals_not_sherpa['id'] -journals_not_sherpa = pd.merge(journals_not_sherpa, sherpa_match_results_missing, on='issnl', how='left') -del journals_not_sherpa['id'] -journals_not_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnljournaltitlesherpa_match_xsherpa_match_y
00003-6935398Applied opticsemptyNaN
10003-9926605Archives of internal medicine (1960)emptymissing
20003-9942974Archives of neurology (Chicago)emptymissing
30007-4403885Bulletin de psychologieemptymissing
40008-042X180Cahiers pédagogiques (Revue)emptymissing
..................
962264-7228503Distances et médiations des savoirsemptyNaN
972297-0703989Schweizer Krebs-BulletinemptyNaN
982297-6981618Swiss archives of neurology, psychiatry and ps...emptymissing
992352-1791639Nuclear materials and energyemptyNaN
1002504-4427994GG@G (Print)emptyNaN
-

101 rows × 5 columns

-
- - - - -```python -# extraction des informations des journaux à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # test d'existance du fichier - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - title = np.nan - url = np.nan - if (len(data['items']) > 0): - if ('url' in data['items'][0]): - url = data['items'][0]['url'] - if ('title' in data['items'][0]['title'][0]): - title = data['items'][0]['title'][0]['title'] - sherpa_journal = sherpa_journal.append({'journal' : journal_id, 'title' : title, 'url' : url}, ignore_index=True) -``` - - 0 - 10 - 20 - 30 - 40 - 50 - 60 - 70 - 80 - 90 - 100 - 110 - 120 - 130 - 140 - 150 - 160 - 170 - 180 - 190 - 200 - 210 - 220 - 230 - 240 - 250 - 260 - 270 - 280 - 290 - 300 - 310 - 320 - 330 - 340 - 350 - 360 - 370 - 380 - 390 - 400 - 410 - 420 - 430 - 440 - 450 - 460 - 470 - 480 - 490 - 500 - 510 - 520 - 530 - 540 - 550 - 560 - 570 - 580 - 590 - 600 - 610 - 620 - 630 - 640 - 650 - 660 - 670 - 680 - 690 - 700 - 710 - 720 - 730 - 740 - 750 - 760 - 770 - 780 - 790 - 800 - 810 - 820 - 830 - 840 - 850 - 860 - 870 - 880 - 890 - 900 - 910 - 920 - 930 - 940 - 950 - 960 - 970 - 980 - 990 - 1000 - 1010 - 1020 - 1030 - 1040 - 1050 - 1060 - 1070 - 1080 - 1090 - 1100 - 1110 - 1120 - 1130 - 1140 - 1150 - 1160 - 1170 - 1180 - 1190 - 1200 - 1210 - 1220 - 1230 - 1240 - 1250 - 1260 - 1270 - 1280 - 1290 - 1300 - 1310 - 1320 - 1330 - 1340 - 1350 - 1360 - 1370 - 1380 - 1390 - 1400 - 1410 - 1420 - 1430 - 1440 - 1450 - 1460 - 1470 - 1480 - 1490 - 1500 - 1510 - 1520 - 1530 - 1540 - 1550 - 1560 - 1570 - 1580 - 1590 - 1600 - 1610 - 1620 - 1630 - 1640 - 1650 - 1660 - 1670 - 1680 - 1690 - 1700 - 1710 - 1720 - 1730 - 1740 - 1750 - - - -```python -sherpa_journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journaltitleurl
0532Tissue Antigenshttp://onlinelibrary.wiley.com/journal/10.1111...
1498Accounts of Chemical Researchhttp://pubs.acs.org/journal/achre4
2498Accounts of Chemical Researchhttp://pubs.acs.org/journal/achre4
3789The Journal of the Acoustical Society of Americahttp://asa.scitation.org/journal/jas
4166Acta Neurochirurgicahttp://link.springer.com/journal/701
............
123880Physical Review Dhttp://prd.aps.org/
123980Physical Review Dhttp://prd.aps.org/
1240533Physical Review Ehttp://journals.aps.org/pre/abstract/10.1103/P...
1241533Physical Review Ehttp://journals.aps.org/pre/abstract/10.1103/P...
1242608Physical Review Materialshttp://journals.aps.org/prmaterials/
-

1243 rows × 3 columns

-
- - - - -```python -# extraction des informations à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # test d'existance du fichier - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - myissn = np.nan - mytype = np.nan - data = json.load(f) - if (len(data['items']) > 0): - if ('issns' in data['items'][0]): - issns = data['items'][0]['issns'] - for i in issns: - if ('issn' in i): - myissn = i['issn'] - if ('type' in i): - mytype = i['type'] - sherpa_issn = sherpa_issn.append({'issn' : myissn, 'type' : mytype}, ignore_index=True) -``` - - 0 - 10 - 20 - 30 - 40 - 50 - 60 - 70 - 80 - 90 - 100 - 110 - 120 - 130 - 140 - 150 - 160 - 170 - 180 - 190 - 200 - 210 - 220 - 230 - 240 - 250 - 260 - 270 - 280 - 290 - 300 - 310 - 320 - 330 - 340 - 350 - 360 - 370 - 380 - 390 - 400 - 410 - 420 - 430 - 440 - 450 - 460 - 470 - 480 - 490 - 500 - 510 - 520 - 530 - 540 - 550 - 560 - 570 - 580 - 590 - 600 - 610 - 620 - 630 - 640 - 650 - 660 - 670 - 680 - 690 - 700 - 710 - 720 - 730 - 740 - 750 - 760 - 770 - 780 - 790 - 800 - 810 - 820 - 830 - 840 - 850 - 860 - 870 - 880 - 890 - 900 - 910 - 920 - 930 - 940 - 950 - 960 - 970 - 980 - 990 - 1000 - 1010 - 1020 - 1030 - 1040 - 1050 - 1060 - 1070 - 1080 - 1090 - 1100 - 1110 - 1120 - 1130 - 1140 - 1150 - 1160 - 1170 - 1180 - 1190 - 1200 - 1210 - 1220 - 1230 - 1240 - 1250 - 1260 - 1270 - 1280 - 1290 - 1300 - 1310 - 1320 - 1330 - 1340 - 1350 - 1360 - 1370 - 1380 - 1390 - 1400 - 1410 - 1420 - 1430 - 1440 - 1450 - 1460 - 1470 - 1480 - 1490 - 1500 - 1510 - 1520 - 1530 - 1540 - 1550 - 1560 - 1570 - 1580 - 1590 - 1600 - 1610 - 1620 - 1630 - 1640 - 1650 - 1660 - 1670 - 1680 - 1690 - 1700 - 1710 - 1720 - 1730 - 1740 - 1750 - - - -```python -sherpa_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntype
00001-2815print
11399-0039electronic
20001-4842print
31520-4898electronic
40001-4842print
.........
21962470-0045print
21972470-0053electronic
21982470-0045print
21992470-0053electronic
22002475-9953electronic
-

2201 rows × 2 columns

-
- - - - -```python -# dedup -sherpa_issn = sherpa_issn.drop_duplicates() -sherpa_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntype
00001-2815print
11399-0039electronic
20001-4842print
31520-4898electronic
60001-4966print
.........
21922470-0010print
21932470-0029electronic
21962470-0045print
21972470-0053electronic
22002475-9953electronic
-

1333 rows × 2 columns

-
- - - - -```python -# completer le fichier des issns avec les types de sherpa -issn2 = pd.merge(issn, sherpa_issn, on='issn', how='left') -issn2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11print
11399-00390001-2815532NaN32electronic
20001-48420001-4842498PRINT13print
31520-48980001-4842498NaN34electronic
40001-49660001-4966789PRINT15print
........................
17552470-00452470-0045533OTHER31756print
17562470-00532470-0045533NaN31757electronic
17572475-99532475-9953608ELECTRONIC21758electronic
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -# exports csv -publisher_sherpa_dedup.to_csv('sample/publisher_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -sherpa_match_issn.to_csv('sample/sherpa_match_issn.tsv', sep='\t', encoding='utf-8', index=False) -sherpa_journal.to_csv('sample/sherpa_journal.tsv', sep='\t', encoding='utf-8', index=False) -issn2.to_csv('sample/issn_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -journals_not_sherpa.to_csv('sample/journals_not_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# exports excel -publisher_sherpa_dedup.to_excel('sample/publisher_sherpa.xlsx', index=False) -sherpa_match_issn.to_excel('sample/sherpa_match_issn.xlsx', index=False) -sherpa_journal.to_excel('sample/sherpa_journal.xlsx', index=False) -issn2.to_excel('sample/issn_sherpa.xlsx', index=False) -journals_not_sherpa.to_excel('sample/journals_not_sherpa.xlsx', index=False) -``` - - -```python -# ajout des titres Sherpa a la table des revues -# renommer les colonnes -sherpa_journal = sherpa_journal.rename(columns={'journal' : 'id'}) -journal = pd.merge(journal, sherpa_journal, on='id', how='left') -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitle_xstarting_yearend_yearurl_xname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_statuspublishertitle_yurl_y
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN11NaNNaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.012Physical Review Lettershttp://prl.aps.org/
220031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.012Physical Review Lettershttp://prl.aps.org/
331932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1.0Yes1.0PLoS One1.0NaN0.0NaN0.04.035714NaN53PLoS ONEhttp://www.plosone.org/
442174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN14, 5NaNNaN
.................................................................................
13419980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN175Journal of Pediatric Surgeryhttp://www.jpedsurg.org/
13429991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18Probability Theory and Related Fieldshttp://www.springerlink.com/content/100451/?p=...
13439991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18Probability Theory and Related Fieldshttp://www.springerlink.com/content/100451/?p=...
134410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN1119Renewable Energyhttp://www.elsevier.com/wps/product/cws_home/9...
134510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN1217NaNNaN
-

1346 rows × 26 columns

-
- - - - -```python -# choix du titre et url -journal['url'] = journal['url_y'] -journal.loc[journal['url_y'].isna(), 'url'] = journal['url_x'] -journal['title'] = journal['title_y'] -journal.loc[journal['title_y'].isna(), 'title'] = journal['title_x'] -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitle_xstarting_yearend_yearurl_xname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_statuspublishertitle_yurl_yurltitle
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN11NaNNaNNaNRevue médicale suisse
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.012Physical Review Lettershttp://prl.aps.org/http://prl.aps.org/Physical Review Letters
220031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.012Physical Review Lettershttp://prl.aps.org/http://prl.aps.org/Physical Review Letters
331932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1.0Yes1.0PLoS One1.0NaN0.0NaN0.04.035714NaN53PLoS ONEhttp://www.plosone.org/http://www.plosone.org/PLoS ONE
442174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN14, 5NaNNaNNaNEU-topías
.......................................................................................
13419980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN175Journal of Pediatric Surgeryhttp://www.jpedsurg.org/http://www.jpedsurg.org/Journal of Pediatric Surgery
13429991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18Probability Theory and Related Fieldshttp://www.springerlink.com/content/100451/?p=...http://www.springerlink.com/content/100451/?p=...Probability Theory and Related Fields
13439991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18Probability Theory and Related Fieldshttp://www.springerlink.com/content/100451/?p=...http://www.springerlink.com/content/100451/?p=...Probability Theory and Related Fields
134410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN1119Renewable Energyhttp://www.elsevier.com/wps/product/cws_home/9...http://www.elsevier.com/wps/product/cws_home/9...Renewable Energy
134510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN1217NaNNaNhttps://www.physiology.org/journal/japplJournal of applied physiology: respiratory, en...
-

1346 rows × 28 columns

-
- - - - -```python -journals_export = journal[['id', 'title', 'name_short_iso_4', 'starting_year', 'end_year', 'url', 'country', 'language', 'oa_status', 'publisher', 'doaj_seal', 'doaj_status', 'lockss', 'portico', 'nlch', 'qoam_av_score']] -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idtitlename_short_iso_4starting_yearend_yearurlcountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
01Revue médicale suisseRev. méd. suisse20059999NaN21513811NaN0.00.00.00.0NaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/23612412NaN0.00.01.00.0NaN
22Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/23612412NaN0.00.01.00.0NaN
33PLoS ONENaN20069999http://www.plosone.org/236124531.01.01.00.00.04.035714
44EU-topíasEU-topías20119999NaN209124, 138, 402, 29214, 5NaN0.00.00.00.0NaN
...................................................
1341998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/236124175NaN0.00.01.00.0NaN
1342999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...8312418NaN0.01.01.01.0NaN
1343999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...8312418NaN0.01.01.01.0NaN
13441000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...2341241119NaN0.00.01.00.0NaN
13451001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl2361241217NaN0.00.00.00.0NaN
-

1346 rows × 16 columns

-
- - - - -```python -# renommage des champs finaux -journals_export = journals_export.rename(columns={'title' : 'name', 'url' : 'website'}) -# remplacement des vides et id à int -journals_export['starting_year'] = journals_export['starting_year'].fillna(0) -journals_export['end_year'] = journals_export['end_year'].fillna(9999) -journals_export['name_short_iso_4'] = journals_export['name_short_iso_4'].fillna('') -journals_export['website'] = journals_export['website'].fillna('') -journals_export['doaj_seal'] = journals_export['doaj_seal'].fillna('0') -journals_export['country'] = journals_export['country'].fillna('999999') -journals_export['language'] = journals_export['language'].fillna('999999') -journals_export['doaj_status'] = journals_export['doaj_status'].astype(int) -journals_export['doaj_seal'] = journals_export['doaj_seal'].astype(int) -journals_export['lockss'] = journals_export['lockss'].astype(int) -journals_export['portico'] = journals_export['portico'].astype(int) -journals_export['nlch'] = journals_export['nlch'].astype(int) -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
01Revue médicale suisseRev. méd. suisse200599992151381100000NaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN
22Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN
33PLoS ONE20069999http://www.plosone.org/23612453111004.035714
44EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaN
...................................................
1341998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN
1342999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN
1343999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN
13441000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN
13451001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaN
-

1346 rows × 16 columns

-
- - - - -```python -journals_export = journals_export.drop_duplicates(subset='id') -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
01Revue médicale suisseRev. méd. suisse200599992151381100000NaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN
33PLoS ONE20069999http://www.plosone.org/23612453111004.035714
44EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaN
55Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/2361241600010NaN
...................................................
1339997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412414700010NaN
1341998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN
1342999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN
13441000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN
13451001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaN
-

911 rows × 16 columns

-
- - - - -```python -# test journaux sans titre -journals_export.loc[journals_export['name'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
-
- - - - -```python -# export et suppression des journaux sans titre -# export csv -journals_export.loc[journals_export['name'].isna()].to_csv('sample/sherpa_journals_without_title.tsv', sep='\t', encoding='utf-8', index=False) -# export excel -journals_export.loc[journals_export['name'].isna()].to_excel('sample/sherpa_journals_without_title.xlsx', index=False) -journals_export = journals_export.loc[journals_export['name'].notna()] -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
01Revue médicale suisseRev. méd. suisse200599992151381100000NaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN
33PLoS ONE20069999http://www.plosone.org/23612453111004.035714
44EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaN
55Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/2361241600010NaN
...................................................
1339997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412414700010NaN
1341998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN
1342999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN
13441000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN
13451001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaN
-

911 rows × 16 columns

-
- - - - -```python -journals_export.loc[journals_export['name'].str.contains('(Print)')] -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\strings.py:1843: UserWarning: This pattern has match groups. To actually get the groups, use str.extract. - return func(self, *args, **kwargs) - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
8654Helvetica physica acta (Print)Helv. phys. acta (Print)19281999215124, 138, 15114100000NaN
239155Studies in health technology and informatics (...Stud. health technol. inform. (Print)1991999915612419000000NaN
441306Bioethica Forum (Basel. 2008. Print)Bioeth. Forum (Basel, 2008, Print)20089999215138, 124, 151114300000NaN
534373Schweizerische Ärztezeitung (Print)Schweiz. Ärzteztg. (Print)19529999215203, 151, 138117000000NaN
601430The European physical journal. B, Condensed ma...Eur. phys. j., B Cond. matter phys. (Print)19989999761241195, 43001111.25
650467Conference on Lasers and Electro-optics (Print)Conf. Lasers Electro-opt. (Print)20039999http://www.cleoconference.org/23612413900000NaN
850618Swiss archives of neurology, psychiatry and ps...Swiss arch. neurol. psychiatry psychother. (Pr...20169999215151, 124, 13862001000NaN
901660Journal der Deutschen Dermatologischen Gesells...20039999234151, 124128300010NaN
957702IEEE/LEOS International Conference on Optical ...IEEE/LEOS Int. Conf. Opt. MEMS Nanophotonics (...200720uuhttp://ieeexplore.ieee.org/xpl/conhome.jsp?pun...236124128000000NaN
1104814Forumpoenale (Print)Forumpoenale (Print)20089999215151, 203, 138120400000NaN
1182877Gesnerus (Print)Gesnerus (Print)19439999215124, 138, 151, 203114300000NaN
1336994GG@G (Print)GG@G (Print)20009999215124138000000NaN
-
- - - - -```python -journals_export.loc[journals_export['name'].str.contains('(Online)')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
1257936Plastic and reconstructive surgery (Online)Plast. reconstr. surg. (Online)19639999http://gateway.ovid.com/ovidweb.cgi?T=JS&MODE=...236124136300000NaN
-
- - - - -```python -# remplacement des mentions " (Print)" et " (Online)" dans les titres -journals_export['name'] = journals_export['name'].str.replace('(Print)', '') -journals_export['name'] = journals_export['name'].str.replace('(Online)', '') -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
01Revue médicale suisseRev. méd. suisse200599992151381100000NaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN
33PLoS ONE20069999http://www.plosone.org/23612453111004.035714
44EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaN
55Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/2361241600010NaN
...................................................
1339997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412414700010NaN
1341998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN
1342999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN
13441000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN
13451001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaN
-

911 rows × 16 columns

-
- - - - -```python -journals_export.loc[journals_export['name'].str.contains('(Print)')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
-
- - - - -```python -journals_export.loc[journals_export['name'].str.contains('(Online)')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
-
- - - -## Table sherpa_policies - - -```python -# creation du DF -col_names = ['journal', - 'issn', - 'sherpa_id', - 'sherpa_uri', - 'open_access_prohibited', - 'additional_oa_fee', - 'article_version', - 'license', - 'embargo', - 'prerequisites', - 'prerequisite_funders', - 'prerequisite_funders_name', - 'prerequisite_funders_fundref', - 'prerequisite_funders_ror', - 'prerequisite_funders_country', - 'prerequisite_funders_url', - 'prerequisite_funders_sherpa_id', - 'prerequisite_subjects', - 'location', - 'locations_ir', - 'locations_not_ir', - 'named_repository', - 'named_academic_social_network', - 'copyright_owner', - 'publisher_deposit', - 'archiving', - 'conditions', - 'public_notes' - ] -sherpa_policies = pd.DataFrame(columns = col_names) -sherpa_policies -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notes
-
- - - - -```python -# dédoublonage par journal id -issn_dedup = issn.drop_duplicates(subset='journal') -issn_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeid
00001-28150001-2815532PRINT11
20001-48420001-4842498PRINT13
40001-49660001-4966789PRINT15
70001-62680001-6268166PRINT18
90001-63220001-6322807PRINT110
.....................
17512469-99502469-995041PRINT11752
17532470-00102470-001080PRINT11754
17552470-00452470-0045533OTHER31756
17572475-99532475-9953608ELECTRONIC21758
17582504-44272504-4427994PRINT11759
-

909 rows × 6 columns

-
- - - - -```python -# type de repositories qui provoquent archiving = 1 : -# tous les types : 'academic_social_network', 'any_repository', 'any_website', 'authors_homepage', -# 'funder_designated_location', 'institutional_repository', 'institutional_website', 'named_academic_social_network', -# 'named_repository', 'non_commercial_institutional_repository', 'non_commercial_repository', -# 'non_commercial_social_network', 'non_commercial_subject_repository', 'non_commercial_website', -# 'preprint_repository', 'subject_repository', 'this_journal' -repositories_archiving = ['any_repository', - 'institutional_repository', - 'institutional_website', - 'non_commercial_institutional_repository', - 'non_commercial_repository', - 'any_website', - 'non_commercial_website'] - -# extraction des termes -for index, row in issn_dedup.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - # test d'existance du fichier - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - # initialisation des variables à extraire - sherpa_id = np.nan - sherpa_uri = np.nan - open_access_prohibited = np.nan - location = np.nan - locations_ir = '' - locations_not_ir = '' - additional_oa_fee = np.nan - article_versions = np.nan - article_version = np.nan - licenses = [] - embargo = 0 - prerequisites = np.nan - prerequisite_funders = np.nan - prerequisite_funders_name = np.nan - prerequisite_funders_fundref = np.nan - prerequisite_funders_ror = np.nan - prerequisite_funders_country = np.nan - prerequisite_funders_url = np.nan - prerequisite_funders_sherpa_id = np.nan - prerequisite_subjects = np.nan - named_repository = np.nan - named_academic_social_network = np.nan - copyright_owner = np.nan - publisher_deposit = np.nan - archiving = np.nan - conditions = np.nan - public_notes = np.nan - if (len(data['items']) > 0): - if ('id' in data['items'][0]): - sherpa_id = data['items'][0]['id'] - # test si l'id est déjà présent - if sherpa_id in sherpa_policies['sherpa_id'] : - print('SKIP ' + str(sherpa_id)) - else : - poilicies = data['items'][0]['publisher_policy'] - for poilicy in poilicies: - # initialisation des variables à extraire - sherpa_uri = np.nan - open_access_prohibited = np.nan - if ('uri' in poilicy): - sherpa_uri = poilicy['uri'] - if ('open_access_prohibited' in poilicy): - open_access_prohibited = poilicy['open_access_prohibited'] - if ('permitted_oa' in poilicy): - poas = poilicy['permitted_oa'] - for poa in poas: - additional_oa_fee = np.nan - article_versions = np.nan - article_version = np.nan - licenses = [] - embargo = 0 - prerequisites = np.nan - prerequisite_funders = np.nan - prerequisite_funders_name = np.nan - prerequisite_funders_fundref = np.nan - prerequisite_funders_ror = np.nan - prerequisite_funders_country = np.nan - prerequisite_funders_url = np.nan - prerequisite_funders_sherpa_id = np.nan - prerequisite_subjects = np.nan - named_repository = np.nan - named_academic_social_network = np.nan - locations_ir = '' - locations_not_ir = '' - copyright_owner = np.nan - conditions = np.nan - public_notes = np.nan - if ('additional_oa_fee' in poa): - additional_oa_fee = poa['additional_oa_fee'] - if ('location' in poa): - archiving = 0 - location = '' - mylocations = poa['location']['location'] - mylocations_text = poa['location']['location_phrases'] - if (type(mylocations) is not list): - mylocations = [mylocations] - location = ' ; '.join(mylocations) - for locationi in mylocations: - if locationi in repositories_archiving : - archiving = archiving + 1 - for locationi_text in mylocations_text: - if locationi_text['value'] == locationi : - if locations_ir == '': - locations_ir = locations_ir + locationi_text['phrase'] - else : - if locationi_text['phrase'] not in locations_ir : - locations_ir = locations_ir + ' ; ' + locationi_text['phrase'] - else : - for locationi_text in mylocations_text: - if locationi_text['value'] == locationi : - if locations_not_ir == '': - locations_not_ir = locations_not_ir + locationi_text['phrase'] - else : - if locationi_text['phrase'] not in locations_not_ir : - locations_not_ir = locations_not_ir + ' ; ' + locationi_text['phrase'] - # print (archiving) - if archiving > 0: - archiving = True - else : - archiving = False - if ('named_repository' in poa['location']): - if (type(poa['location']['named_repository']) is list): - named_repository = ' ; '.join(poa['location']['named_repository']) - else : - named_repository = poa['location']['named_repository'] - locations_not_ir = locations_not_ir.replace('Named Repository', named_repository) - locations_ir = locations_ir.replace('Named Repository', named_repository) - if ('named_academic_social_network' in poa['location']): - if (type(poa['location']['named_academic_social_network']) is list): - named_academic_social_network = ' ; '.join(poa['location']['named_academic_social_network']) - else : - named_academic_social_network = poa['location']['named_academic_social_network'] - locations_not_ir = locations_not_ir.replace('Named Academic Social Network', named_academic_social_network) - locations_ir = locations_ir.replace('Named Academic Social Network', named_academic_social_network) - if ('embargo' in poa): - # print(poa['embargo']) - embargo_amount = 0 - if ('amount' in poa['embargo']): - embargo_amount = poa['embargo']['amount'] - if ('units' in poa['embargo']): - if (poa['embargo']['units'] == 'months') : - embargo = embargo_amount - elif (poa['embargo']['units'] == 'years') : - embargo = embargo_amount*12 - elif (poa['embargo']['units'] == 'weeks') : - embargo = int(embargo_amount/4) - if (embargo == 0): - embargo = 1 - elif (poa['embargo']['units'] == 'days') : - embargo = int(embargo_amount/30) - if (embargo == 0): - embargo = 1 - else : - embargo = embargo_amount - if ('prerequisites' in poa): - if 'prerequisites' in poa['prerequisites'] : - if (type(poa['prerequisites']['prerequisites']) is list): - prerequisites = ' ; '.join(poa['prerequisites']['prerequisites']) - else: - prerequisites = poa['prerequisites']['prerequisites'] - if ('prerequisite_funders' in poa['prerequisites']): - prerequisite_funders = True - # prerequisite_funders = poa['prerequisites']['prerequisite_funders'] - # if (type(poa['prerequisites']['prerequisite_funders']) is list): - # prerequisite_funders = ' ; '.join(poa['prerequisites']['prerequisite_funders']) - # else: - # prerequisite_funders = poa['prerequisites']['prerequisite_funders'] - if ('prerequisite_subjects' in poa['prerequisites']): - prerequisite_subjects = True - # prerequisite_subjects = poa['prerequisites']['prerequisite_subjects'] - # if (type(poa['prerequisite_subjects']) is list): - # prerequisite_subjects = ' ; '.join(poa['prerequisite_subjects']) - # else: - # prerequisite_subjects = poa['prerequisite_subjects'] - if ('copyright_owner' in poa): - copyright_owner = poa['copyright_owner'] - if ('publisher_deposit' in poa): - publisher_deposit = '' - if (type(poa['publisher_deposit']) is list): - for deposit in poa['publisher_deposit']: - if 'type' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['type'] - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + ' (' + deposit['repository_metadata']['name'][0]['name'] + ')' - else : - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['name'][0]['name'] - publisher_deposit = publisher_deposit + ' ; ' - else : - deposit = poa['publisher_deposit'] - if 'type' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['type'] - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + ' (' + deposit['repository_metadata']['name'][0]['name'] + ')' - else : - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['name'][0]['name'] - publisher_deposit = publisher_deposit + ' ; ' - # print (publisher_deposit) - if ('conditions' in poa): - if (type(poa['conditions']) is list): - conditions = ' ; '.join(poa['conditions']) - else: - conditions = poa['conditions'] - if ('public_notes' in poa): - if (type(poa['public_notes']) is list): - public_notes = ' ; '.join(poa['public_notes']) - else: - public_notes = poa['public_notes'] - if ('license' in poa): - licenses = poa['license'] - if (type(licenses) is not list): - licenses = [licenses] - else : - licenses = [''] - # avec article version - if ('article_version' in poa): - article_versions = poa['article_version'] - for article_version in article_versions: - for license in licenses: - if ('license' in license): - mylicense = license['license'] - else : - mylicense = '' - # avec prerequisites - if ('prerequisites' in poa) : - # avec prerequisites_funders - if ('prerequisite_funders' in poa['prerequisites']): - for prerequisite_fundersi in poa['prerequisites']['prerequisite_funders'] : - prerequisite_funders_name = prerequisite_fundersi['funder_metadata']['name'][0]['name'] - if 'acronym' in prerequisite_fundersi['funder_metadata']['name'][0]: - prerequisite_funders_name = prerequisite_funders_name + ' (' + prerequisite_fundersi['funder_metadata']['name'][0]['acronym'] + ')' - if 'identifiers' in prerequisite_fundersi['funder_metadata'] : - for fund_identifier in prerequisite_fundersi['funder_metadata']['identifiers'] : - if fund_identifier['type'] == 'fundref': - prerequisite_funders_fundref = fund_identifier['identifier'] - if fund_identifier['type'] == 'ror': - prerequisite_funders_ror = fund_identifier['identifier'] - if 'country' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_country = prerequisite_fundersi['funder_metadata']['country'] - if 'url' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_url = prerequisite_fundersi['funder_metadata']['url'][0]['url'] - prerequisite_funders_sherpa_id = prerequisite_fundersi['funder_metadata']['id'] - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites_funders - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - - # sans article version - else : - if (type(licenses) is not list): - licenses = [licenses] - for license in licenses: - if ('license' in license): - mylicense = license['license'] - else : - mylicense = '' - # avec prerequisites - if ('prerequisites' in poa) : - # avec prerequisites_funders - if ('prerequisite_funders' in poa['prerequisites']): - for prerequisite_fundersi in poa['prerequisites']['prerequisite_funders'] : - prerequisite_funders_name = prerequisite_fundersi['funder_metadata']['name'][0]['name'] - if 'acronym' in prerequisite_fundersi['funder_metadata']['name'][0]: - prerequisite_funders_name = prerequisite_funders_name + ' (' + prerequisite_fundersi['funder_metadata']['name'][0]['acronym'] + ')' - if 'identifiers' in prerequisite_fundersi['funder_metadata'] : - for fund_identifier in prerequisite_fundersi['funder_metadata']['identifiers'] : - if fund_identifier['type'] == 'fundref': - prerequisite_funders_fundref = fund_identifier['identifier'] - if fund_identifier['type'] == 'ror': - prerequisite_funders_ror = fund_identifier['identifier'] - if 'country' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_country = prerequisite_fundersi['funder_metadata']['country'] - if 'url' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_url = prerequisite_fundersi['funder_metadata']['url'][0]['url'] - prerequisite_funders_sherpa_id = prerequisite_fundersi['funder_metadata']['id'] - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites_funders - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans permitted_oa - else : - print ('permitted_oa MISSING') - else : - print ('id MISSING') -``` - - 0 - 20 - 40 - 50 - 60 - SKIP 321 - 110 - SKIP 475 - SKIP 476 - 180 - 220 - 250 - 260 - 290 - 300 - 330 - 340 - 360 - 370 - 380 - 420 - permitted_oa MISSING - 430 - permitted_oa MISSING - SKIP 1319 - SKIP 880 - permitted_oa MISSING - 510 - permitted_oa MISSING - 530 - 540 - 550 - 560 - SKIP 1342 - 570 - 590 - SKIP 3082 - SKIP 2465 - SKIP 1682 - SKIP 325 - SKIP 3179 - 670 - 680 - SKIP 1641 - SKIP 1202 - 720 - SKIP 3995 - 730 - SKIP 3475 - SKIP 3490 - 740 - 750 - 760 - SKIP 1383 - SKIP 1357 - permitted_oa MISSING - 830 - 840 - SKIP 1868 - 850 - SKIP 883 - 880 - 890 - SKIP 1392 - 900 - 910 - SKIP 1377 - 920 - SKIP 3443 - 930 - 940 - SKIP 1123 - SKIP 3581 - SKIP 3558 - SKIP 745 - 980 - 990 - SKIP 11 - SKIP 2499 - 1000 - SKIP 42 - 1010 - 1020 - SKIP 314 - 1030 - 1040 - SKIP 1380 - SKIP 229 - SKIP 1518 - SKIP 5682 - SKIP 4708 - SKIP 1661 - 1130 - SKIP 6585 - 1140 - SKIP 3212 - 1150 - SKIP 335 - SKIP 6774 - 1160 - SKIP 6590 - 1180 - SKIP 1639 - SKIP 5094 - SKIP 1254 - 1200 - SKIP 6325 - SKIP 3539 - SKIP 1444 - SKIP 250 - SKIP 1543 - SKIP 3415 - SKIP 3571 - SKIP 3474 - SKIP 3586 - SKIP 3220 - SKIP 3837 - SKIP 1650 - SKIP 1051 - SKIP 3572 - SKIP 612 - SKIP 6587 - SKIP 3567 - SKIP 1654 - SKIP 4070 - SKIP 1643 - SKIP 6588 - SKIP 1657 - SKIP 1687 - SKIP 1692 - SKIP 1341 - 1320 - SKIP 7150 - SKIP 876 - 1330 - SKIP 7007 - SKIP 7091 - 1340 - 1350 - SKIP 173 - SKIP 4703 - 1360 - SKIP 2515 - 1370 - SKIP 242 - SKIP 3930 - SKIP 2004 - 1400 - 1410 - SKIP 2123 - SKIP 1320 - SKIP 1459 - SKIP 1588 - SKIP 7678 - SKIP 1391 - SKIP 878 - SKIP 138 - SKIP 7632 - SKIP 1644 - SKIP 1637 - SKIP 2207 - SKIP 2428 - SKIP 2432 - 1460 - SKIP 2477 - SKIP 2430 - SKIP 1653 - SKIP 2397 - SKIP 5935 - SKIP 3527 - SKIP 148 - SKIP 7793 - SKIP 4005 - SKIP 7768 - SKIP 3455 - SKIP 1652 - SKIP 3570 - SKIP 7792 - SKIP 3533 - SKIP 6586 - 1520 - SKIP 7787 - SKIP 3355 - 1530 - SKIP 226 - SKIP 1655 - SKIP 7783 - 1540 - SKIP 6582 - 1550 - SKIP 7762 - SKIP 4691 - SKIP 1911 - SKIP 1447 - SKIP 1778 - SKIP 1888 - SKIP 228 - SKIP 7407 - SKIP 7965 - 1590 - 1600 - 1610 - SKIP 821 - SKIP 823 - SKIP 7714 - 1620 - SKIP 172 - SKIP 2624 - SKIP 3654 - SKIP 1659 - SKIP 1656 - SKIP 1658 - SKIP 1393 - 1640 - SKIP 6778 - SKIP 8220 - SKIP 7872 - SKIP 1587 - SKIP 822 - SKIP 1460 - SKIP 6581 - SKIP 3568 - 1670 - SKIP 7509 - SKIP 7799 - SKIP 7765 - 1680 - SKIP 7761 - SKIP 7800 - 1690 - SKIP 1244 - 1710 - SKIP 6222 - 1730 - 1740 - 1750 - - - -```python -sherpa_policies -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notes
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoaccepted12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...ChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN
.......................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoaccepted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublished0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN
-

8595 rows × 28 columns

-
- - - - -```python -# convertir l'index en id -sherpa_policies = sherpa_policies.reset_index() -# ajout de l'id avec l'index + 1 -sherpa_policies['id'] = sherpa_policies['index'] + 1 -del sherpa_policies['index'] -sherpa_policies -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN1
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoaccepted12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN2
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN3
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN4
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...ChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN5
..........................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8591
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoaccepted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8592
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublished0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8593
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8594
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8595
-

8595 rows × 29 columns

-
- - - - -```python -# export csv -sherpa_policies.to_csv('sample/sherpa_policies_brut.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -sherpa_policies.to_excel('sample/sherpa_policies_brut.xlsx', index=False) -``` - -## Calcul de la catégorie "green" et export final des journaux - - -```python -sherpa_policies -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN1
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoaccepted12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN2
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN3
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN4
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...ChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN5
..........................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8591
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoaccepted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8592
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublished0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8593
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8594
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8595
-

8595 rows × 29 columns

-
- - - - -```python -sherpa_policies_ir = sherpa_policies.loc[(sherpa_policies['archiving'] == True) & (sherpa_policies['article_version'] == 'published') & (sherpa_policies['prerequisite_funders'].isna())][['journal', 'embargo', 'license', 'conditions']] -sherpa_policies_ir -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalembargolicenseconditions
25320cc_byPublished source must be acknowledged
35320cc_by_nc_ndPublished source must be acknowledged
949812cc_byNaN
1049812cc_by_nc_ndNaN
1149812bespoke_licenseNaN
...............
85885330cc_byNaN
85895330cc_byNaN
85926080Must link to published article ; Publisher cop...
85936080cc_byNaN
85946080cc_byNaN
-

1118 rows × 4 columns

-
- - - - -```python -# dedup -sherpa_policies_ir_id = sherpa_policies_ir[['journal', 'embargo']].sort_values(by=['journal', 'embargo']) -sherpa_policies_ir_dedup = sherpa_policies_ir_id.drop_duplicates(subset='journal') -sherpa_policies_ir_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalembargo
236720
834230
736650
261612
708670
.........
64799960
68739970
18239980
39449990
675010000
-

579 rows × 2 columns

-
- - - - -```python -# ajout de la ctégorie green (2) -sherpa_policies_ir_dedup['oa_status'] = 2 -sherpa_policies_ir_dedup -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalembargooa_status
2367202
8342302
7366502
2616122
7086702
............
647999602
687399702
182399802
394499902
6750100002
-

579 rows × 3 columns

-
- - - - -```python -# merge avec les revues -sherpa_policies_ir_dedup = sherpa_policies_ir_dedup.rename(columns={'journal' : 'id'}) -journals_export = pd.merge(journals_export, sherpa_policies_ir_dedup, on='id', how='left') -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_status_xpublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreembargooa_status_y
01Revue médicale suisseRev. méd. suisse200599992151381100000NaNNaNNaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN02.0
23PLoS ONE20069999http://www.plosone.org/23612453111004.03571402.0
34EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaNNaNNaN
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/2361241600010NaN02.0
.........................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412414700010NaN02.0
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN02.0
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN02.0
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN02.0
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaNNaNNaN
-

911 rows × 18 columns

-
- - - - -```python -# choix de la catégorie OA -journals_export['oa_status'] = journals_export['oa_status_x'] -journals_export.loc[(journals_export['oa_status_x'] == 1) & (journals_export['oa_status_y'].notna()), 'oa_status'] = journals_export['oa_status_y'] -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_status_xpublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreembargooa_status_yoa_status
01Revue médicale suisseRev. méd. suisse200599992151381100000NaNNaNNaN1.0
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN02.02.0
23PLoS ONE20069999http://www.plosone.org/23612453111004.03571402.05.0
34EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaNNaNNaN1.0
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/2361241600010NaN02.02.0
............................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412414700010NaN02.02.0
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN02.02.0
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN02.02.0
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN02.02.0
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaNNaNNaN1.0
-

911 rows × 19 columns

-
- - - - -```python -# 6 : Diamond -# 5 : Gold -# 4 : Full -# 3 : Hybrid -# 2 : Green -# 1 : UNKNOWN -journals_export['oa_status'].value_counts() -``` - - - - - 2.0 518 - 1.0 306 - 5.0 70 - 6.0 17 - Name: oa_status, dtype: int64 - - - - -```python -del journals_export['embargo'] -del journals_export['oa_status_x'] -del journals_export['oa_status_y'] -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguagepublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreoa_status
01Revue médicale suisseRev. méd. suisse20059999215138100000NaN1.0
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/236124200010NaN2.0
23PLoS ONE20069999http://www.plosone.org/2361243111004.0357145.0
34EU-topíasEU-topías20119999209124, 138, 402, 2924, 500000NaN1.0
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/236124600010NaN2.0
...................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-17262341244700010NaN2.0
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/2361247500010NaN2.0
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...83124800111NaN2.0
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...23412411900010NaN2.0
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl23612421700000NaN1.0
-

911 rows × 16 columns

-
- - - - -```python -journals_export['oa_status'] = journals_export['oa_status'].astype(int) -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguagepublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreoa_status
01Revue médicale suisseRev. méd. suisse20059999215138100000NaN1
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/236124200010NaN2
23PLoS ONE20069999http://www.plosone.org/2361243111004.0357145
34EU-topíasEU-topías20119999209124, 138, 402, 2924, 500000NaN1
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/236124600010NaN2
...................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-17262341244700010NaN2
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/2361247500010NaN2
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...83124800111NaN2
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...23412411900010NaN2
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl23612421700000NaN1
-

911 rows × 16 columns

-
- - - - -```python -# export csv -journals_export.to_csv('sample/journal_fin_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -journals_export.to_excel('sample/journal_fin_sherpa.xlsx', index=False) -``` - - -```python -# export csv -sherpa_policies_ir_dedup.to_csv('sample/journal_ir.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -sherpa_policies_ir_dedup.to_excel('sample/journal_ir.xlsx', index=False) -``` - - -```python - -``` diff --git a/import_scripts/06_oacct_sherpa.py b/import_scripts/06_oacct_sherpa.py deleted file mode 100644 index 158819fd..00000000 --- a/import_scripts/06_oacct_sherpa.py +++ /dev/null @@ -1,1107 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données de Sherpa/Romeo obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Données de Sherpa/Romeo -# -# ### Exemple -# -# https://v2.sherpa.ac.uk/cgi/retrieve_by_id?item-type=publication&api-key=EEE6F146-678E-11EB-9C3A-202F3DE2659A&format=Json&identifier=17601 - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) - - -# ## Table publisher_sherpa - -# In[2]: - - -# creation du DF -col_names = ['journal', - 'publisher_id', - 'name', - 'country', - 'type', - 'url' - ] -publisher_sherpa = pd.DataFrame(columns = col_names) -publisher_sherpa - - -# ## Table sherpa match issn - -# In[3]: - - -# creation du DF -col_names = ['issn', - 'sherpa_match', - ] -sherpa_match_issn = pd.DataFrame(columns = col_names) -sherpa_match_issn - - -# ## Table sherpa issns - -# In[4]: - - -# creation du DF -col_names = ['issn', - 'type', - ] -sherpa_issn = pd.DataFrame(columns = col_names) -sherpa_issn - - -# ## Table sherpa journals - -# In[5]: - - -# creation du DF -col_names = ['journal', - 'title', - 'url', - ] -sherpa_journal = pd.DataFrame(columns = col_names) -sherpa_journal - - -# ## Import table Journals et ISSN - -# In[6]: - - -journal = pd.read_csv('sample/journals_publishers_brut.tsv', encoding='utf-8', header=0, sep='\t') -journal - - -# In[7]: - - -issn = pd.read_csv('sample/issn_brut.tsv', encoding='utf-8', header=0, sep='\t') -issn - - -# In[8]: - - -issn_ids = pd.read_csv('sample/issn_ids.tsv', encoding='utf-8', header=0, sep='\t') -issn_ids - - -# ## Extraction de Sherpa Romeo - -# In[9]: - - -# extraction des informations à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # if (((index/10) - int(index/10)) == 0) : - # print(index) - # initialisation des variables à extraire - publisher_id = np.nan - publisher_name = '' - publisher_country = '' - publisher_type = '' - publisher_url = '' - # boucle des fichiers json - # test d'existance du fichier - # print(row['issn']) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - if (len(data['items']) > 0): - publisher_id = data['items'][0]['publishers'][0]['publisher']['id'] - if ('country' in data['items'][0]['publishers'][0]['publisher']): - publisher_country = data['items'][0]['publishers'][0]['publisher']['country'] - if ('relationship_type' in data['items'][0]['publishers'][0]): - publisher_type = data['items'][0]['publishers'][0]['relationship_type'] - if ('url' in data['items'][0]['publishers'][0]['publisher']): - publisher_url = data['items'][0]['publishers'][0]['publisher']['url'] - if ('name' in data['items'][0]['publishers'][0]['publisher']['name'][0]): - publisher_name = data['items'][0]['publishers'][0]['publisher']['name'][0]['name'] - sherpa_match = 'OK' - publisher_sherpa = publisher_sherpa.append({'journal' : journal_id, 'publisher_id' : publisher_id, - 'name' : publisher_name, 'country' : publisher_country, - 'type' : publisher_type, 'url' : publisher_url}, ignore_index=True) - else : - print(row['issn'] + ' - trouvé mais vide') - sherpa_match = 'empty' - else : - print(row['issn'] + ' - pas trouvé') - sherpa_match = 'missing' - sherpa_match_issn = sherpa_match_issn.append({'issn' : row['issn'], 'sherpa_match' : sherpa_match}, ignore_index=True) - - -# In[10]: - - -publisher_sherpa - - -# In[11]: - - -sherpa_match_issn - - -# In[12]: - - -# dedup -publisher_sherpa_dedup = publisher_sherpa.drop_duplicates() -publisher_sherpa_dedup - - -# In[13]: - - -sherpa_match_issn - - -# In[14]: - - -# ajout du issnl et du titre -sherpa_match_issn = pd.merge(sherpa_match_issn, issn_ids, on='issn', how='left') -sherpa_match_issn = pd.merge(sherpa_match_issn, journal[['issnl', 'title']], on='issnl', how='left') -sherpa_match_issn - - -# In[15]: - - -sherpa_match_results = sherpa_match_issn[['id', 'issnl', 'sherpa_match']].groupby(['issnl', 'sherpa_match']).count() -sherpa_match_results - - -# In[16]: - - -sherpa_match_results = sherpa_match_results.reset_index() -sherpa_match_results - - -# In[17]: - - -sherpa_match_results_ok = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'OK'] -issn_ids_issnl = issn_ids[['issnl', 'journal']].drop_duplicates(subset='issnl') -issn_ids_issnl = pd.merge(issn_ids_issnl, sherpa_match_results_ok, on='issnl', how='left') -issn_ids_issnl = pd.merge(issn_ids_issnl, journal[['issnl', 'title']], on='issnl', how='left') -issn_ids_issnl - - -# In[18]: - - -journals_not_sherpa = issn_ids_issnl.loc[issn_ids_issnl['sherpa_match'].isna()] -journals_not_sherpa - - -# In[19]: - - -sherpa_match_results_empty = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'empty'] -sherpa_match_results_missing = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'missing'] -del journals_not_sherpa['sherpa_match'] -del journals_not_sherpa['id'] -journals_not_sherpa = pd.merge(journals_not_sherpa, sherpa_match_results_empty, on='issnl', how='left') -del journals_not_sherpa['id'] -journals_not_sherpa = pd.merge(journals_not_sherpa, sherpa_match_results_missing, on='issnl', how='left') -del journals_not_sherpa['id'] -journals_not_sherpa - - -# In[20]: - - -# extraction des informations des journaux à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # test d'existance du fichier - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - title = np.nan - url = np.nan - if (len(data['items']) > 0): - if ('url' in data['items'][0]): - url = data['items'][0]['url'] - if ('title' in data['items'][0]['title'][0]): - title = data['items'][0]['title'][0]['title'] - sherpa_journal = sherpa_journal.append({'journal' : journal_id, 'title' : title, 'url' : url}, ignore_index=True) - - -# In[21]: - - -sherpa_journal - - -# In[22]: - - -# extraction des informations à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # test d'existance du fichier - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - myissn = np.nan - mytype = np.nan - data = json.load(f) - if (len(data['items']) > 0): - if ('issns' in data['items'][0]): - issns = data['items'][0]['issns'] - for i in issns: - if ('issn' in i): - myissn = i['issn'] - if ('type' in i): - mytype = i['type'] - sherpa_issn = sherpa_issn.append({'issn' : myissn, 'type' : mytype}, ignore_index=True) - - -# In[23]: - - -sherpa_issn - - -# In[24]: - - -# dedup -sherpa_issn = sherpa_issn.drop_duplicates() -sherpa_issn - - -# In[25]: - - -# completer le fichier des issns avec les types de sherpa -issn2 = pd.merge(issn, sherpa_issn, on='issn', how='left') -issn2 - - -# In[26]: - - -# exports csv -publisher_sherpa_dedup.to_csv('sample/publisher_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -sherpa_match_issn.to_csv('sample/sherpa_match_issn.tsv', sep='\t', encoding='utf-8', index=False) -sherpa_journal.to_csv('sample/sherpa_journal.tsv', sep='\t', encoding='utf-8', index=False) -issn2.to_csv('sample/issn_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -journals_not_sherpa.to_csv('sample/journals_not_sherpa.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[27]: - - -# exports excel -publisher_sherpa_dedup.to_excel('sample/publisher_sherpa.xlsx', index=False) -sherpa_match_issn.to_excel('sample/sherpa_match_issn.xlsx', index=False) -sherpa_journal.to_excel('sample/sherpa_journal.xlsx', index=False) -issn2.to_excel('sample/issn_sherpa.xlsx', index=False) -journals_not_sherpa.to_excel('sample/journals_not_sherpa.xlsx', index=False) - - -# In[28]: - - -# ajout des titres Sherpa a la table des revues -# renommer les colonnes -sherpa_journal = sherpa_journal.rename(columns={'journal' : 'id'}) -journal = pd.merge(journal, sherpa_journal, on='id', how='left') -journal - - -# In[29]: - - -# choix du titre et url -journal['url'] = journal['url_y'] -journal.loc[journal['url_y'].isna(), 'url'] = journal['url_x'] -journal['title'] = journal['title_y'] -journal.loc[journal['title_y'].isna(), 'title'] = journal['title_x'] -journal - - -# In[30]: - - -journals_export = journal[['id', 'title', 'name_short_iso_4', 'starting_year', 'end_year', 'url', 'country', 'language', 'oa_status', 'publisher', 'doaj_seal', 'doaj_status', 'lockss', 'portico', 'nlch', 'qoam_av_score']] -journals_export - - -# In[31]: - - -# renommage des champs finaux -journals_export = journals_export.rename(columns={'title' : 'name', 'url' : 'website'}) -# remplacement des vides et id à int -journals_export['starting_year'] = journals_export['starting_year'].fillna(0) -journals_export['end_year'] = journals_export['end_year'].fillna(9999) -journals_export['name_short_iso_4'] = journals_export['name_short_iso_4'].fillna('') -journals_export['website'] = journals_export['website'].fillna('') -journals_export['doaj_seal'] = journals_export['doaj_seal'].fillna('0') -journals_export['country'] = journals_export['country'].fillna('999999') -journals_export['language'] = journals_export['language'].fillna('999999') -journals_export['doaj_status'] = journals_export['doaj_status'].astype(int) -journals_export['doaj_seal'] = journals_export['doaj_seal'].astype(int) -journals_export['lockss'] = journals_export['lockss'].astype(int) -journals_export['portico'] = journals_export['portico'].astype(int) -journals_export['nlch'] = journals_export['nlch'].astype(int) -journals_export - - -# In[32]: - - -journals_export = journals_export.drop_duplicates(subset='id') -journals_export - - -# In[33]: - - -# test journaux sans titre -journals_export.loc[journals_export['name'].isna()] - - -# In[34]: - - -# export et suppression des journaux sans titre -# export csv -journals_export.loc[journals_export['name'].isna()].to_csv('sample/sherpa_journals_without_title.tsv', sep='\t', encoding='utf-8', index=False) -# export excel -journals_export.loc[journals_export['name'].isna()].to_excel('sample/sherpa_journals_without_title.xlsx', index=False) -journals_export = journals_export.loc[journals_export['name'].notna()] -journals_export - - -# In[35]: - - -journals_export.loc[journals_export['name'].str.contains('(Print)')] - - -# In[36]: - - -journals_export.loc[journals_export['name'].str.contains('(Online)')] - - -# In[37]: - - -# remplacement des mentions " (Print)" et " (Online)" dans les titres -journals_export['name'] = journals_export['name'].str.replace('(Print)', '') -journals_export['name'] = journals_export['name'].str.replace('(Online)', '') -journals_export - - -# In[38]: - - -journals_export.loc[journals_export['name'].str.contains('(Print)')] - - -# In[39]: - - -journals_export.loc[journals_export['name'].str.contains('(Online)')] - - -# ## Table sherpa_policies - -# In[40]: - - -# creation du DF -col_names = ['journal', - 'issn', - 'sherpa_id', - 'sherpa_uri', - 'open_access_prohibited', - 'additional_oa_fee', - 'article_version', - 'license', - 'embargo', - 'prerequisites', - 'prerequisite_funders', - 'prerequisite_funders_name', - 'prerequisite_funders_fundref', - 'prerequisite_funders_ror', - 'prerequisite_funders_country', - 'prerequisite_funders_url', - 'prerequisite_funders_sherpa_id', - 'prerequisite_subjects', - 'location', - 'locations_ir', - 'locations_not_ir', - 'named_repository', - 'named_academic_social_network', - 'copyright_owner', - 'publisher_deposit', - 'archiving', - 'conditions', - 'public_notes' - ] -sherpa_policies = pd.DataFrame(columns = col_names) -sherpa_policies - - -# In[41]: - - -# dédoublonage par journal id -issn_dedup = issn.drop_duplicates(subset='journal') -issn_dedup - - -# In[42]: - - -# type de repositories qui provoquent archiving = 1 : -# tous les types : 'academic_social_network', 'any_repository', 'any_website', 'authors_homepage', -# 'funder_designated_location', 'institutional_repository', 'institutional_website', 'named_academic_social_network', -# 'named_repository', 'non_commercial_institutional_repository', 'non_commercial_repository', -# 'non_commercial_social_network', 'non_commercial_subject_repository', 'non_commercial_website', -# 'preprint_repository', 'subject_repository', 'this_journal' -repositories_archiving = ['any_repository', - 'institutional_repository', - 'institutional_website', - 'non_commercial_institutional_repository', - 'non_commercial_repository', - 'any_website', - 'non_commercial_website'] - -# extraction des termes -for index, row in issn_dedup.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - # test d'existance du fichier - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - # initialisation des variables à extraire - sherpa_id = np.nan - sherpa_uri = np.nan - open_access_prohibited = np.nan - location = np.nan - locations_ir = '' - locations_not_ir = '' - additional_oa_fee = np.nan - article_versions = np.nan - article_version = np.nan - licenses = [] - embargo = 0 - prerequisites = np.nan - prerequisite_funders = np.nan - prerequisite_funders_name = np.nan - prerequisite_funders_fundref = np.nan - prerequisite_funders_ror = np.nan - prerequisite_funders_country = np.nan - prerequisite_funders_url = np.nan - prerequisite_funders_sherpa_id = np.nan - prerequisite_subjects = np.nan - named_repository = np.nan - named_academic_social_network = np.nan - copyright_owner = np.nan - publisher_deposit = np.nan - archiving = np.nan - conditions = np.nan - public_notes = np.nan - if (len(data['items']) > 0): - if ('id' in data['items'][0]): - sherpa_id = data['items'][0]['id'] - # test si l'id est déjà présent - if sherpa_id in sherpa_policies['sherpa_id'] : - print('SKIP ' + str(sherpa_id)) - else : - poilicies = data['items'][0]['publisher_policy'] - for poilicy in poilicies: - # initialisation des variables à extraire - sherpa_uri = np.nan - open_access_prohibited = np.nan - if ('uri' in poilicy): - sherpa_uri = poilicy['uri'] - if ('open_access_prohibited' in poilicy): - open_access_prohibited = poilicy['open_access_prohibited'] - if ('permitted_oa' in poilicy): - poas = poilicy['permitted_oa'] - for poa in poas: - additional_oa_fee = np.nan - article_versions = np.nan - article_version = np.nan - licenses = [] - embargo = 0 - prerequisites = np.nan - prerequisite_funders = np.nan - prerequisite_funders_name = np.nan - prerequisite_funders_fundref = np.nan - prerequisite_funders_ror = np.nan - prerequisite_funders_country = np.nan - prerequisite_funders_url = np.nan - prerequisite_funders_sherpa_id = np.nan - prerequisite_subjects = np.nan - named_repository = np.nan - named_academic_social_network = np.nan - locations_ir = '' - locations_not_ir = '' - copyright_owner = np.nan - conditions = np.nan - public_notes = np.nan - if ('additional_oa_fee' in poa): - additional_oa_fee = poa['additional_oa_fee'] - if ('location' in poa): - archiving = 0 - location = '' - mylocations = poa['location']['location'] - mylocations_text = poa['location']['location_phrases'] - if (type(mylocations) is not list): - mylocations = [mylocations] - location = ' ; '.join(mylocations) - for locationi in mylocations: - if locationi in repositories_archiving : - archiving = archiving + 1 - for locationi_text in mylocations_text: - if locationi_text['value'] == locationi : - if locations_ir == '': - locations_ir = locations_ir + locationi_text['phrase'] - else : - if locationi_text['phrase'] not in locations_ir : - locations_ir = locations_ir + ' ; ' + locationi_text['phrase'] - else : - for locationi_text in mylocations_text: - if locationi_text['value'] == locationi : - if locations_not_ir == '': - locations_not_ir = locations_not_ir + locationi_text['phrase'] - else : - if locationi_text['phrase'] not in locations_not_ir : - locations_not_ir = locations_not_ir + ' ; ' + locationi_text['phrase'] - # print (archiving) - if archiving > 0: - archiving = True - else : - archiving = False - if ('named_repository' in poa['location']): - if (type(poa['location']['named_repository']) is list): - named_repository = ' ; '.join(poa['location']['named_repository']) - else : - named_repository = poa['location']['named_repository'] - locations_not_ir = locations_not_ir.replace('Named Repository', named_repository) - locations_ir = locations_ir.replace('Named Repository', named_repository) - if ('named_academic_social_network' in poa['location']): - if (type(poa['location']['named_academic_social_network']) is list): - named_academic_social_network = ' ; '.join(poa['location']['named_academic_social_network']) - else : - named_academic_social_network = poa['location']['named_academic_social_network'] - locations_not_ir = locations_not_ir.replace('Named Academic Social Network', named_academic_social_network) - locations_ir = locations_ir.replace('Named Academic Social Network', named_academic_social_network) - if ('embargo' in poa): - # print(poa['embargo']) - embargo_amount = 0 - if ('amount' in poa['embargo']): - embargo_amount = poa['embargo']['amount'] - if ('units' in poa['embargo']): - if (poa['embargo']['units'] == 'months') : - embargo = embargo_amount - elif (poa['embargo']['units'] == 'years') : - embargo = embargo_amount*12 - elif (poa['embargo']['units'] == 'weeks') : - embargo = int(embargo_amount/4) - if (embargo == 0): - embargo = 1 - elif (poa['embargo']['units'] == 'days') : - embargo = int(embargo_amount/30) - if (embargo == 0): - embargo = 1 - else : - embargo = embargo_amount - if ('prerequisites' in poa): - if 'prerequisites' in poa['prerequisites'] : - if (type(poa['prerequisites']['prerequisites']) is list): - prerequisites = ' ; '.join(poa['prerequisites']['prerequisites']) - else: - prerequisites = poa['prerequisites']['prerequisites'] - if ('prerequisite_funders' in poa['prerequisites']): - prerequisite_funders = True - # prerequisite_funders = poa['prerequisites']['prerequisite_funders'] - # if (type(poa['prerequisites']['prerequisite_funders']) is list): - # prerequisite_funders = ' ; '.join(poa['prerequisites']['prerequisite_funders']) - # else: - # prerequisite_funders = poa['prerequisites']['prerequisite_funders'] - if ('prerequisite_subjects' in poa['prerequisites']): - prerequisite_subjects = True - # prerequisite_subjects = poa['prerequisites']['prerequisite_subjects'] - # if (type(poa['prerequisite_subjects']) is list): - # prerequisite_subjects = ' ; '.join(poa['prerequisite_subjects']) - # else: - # prerequisite_subjects = poa['prerequisite_subjects'] - if ('copyright_owner' in poa): - copyright_owner = poa['copyright_owner'] - if ('publisher_deposit' in poa): - publisher_deposit = '' - if (type(poa['publisher_deposit']) is list): - for deposit in poa['publisher_deposit']: - if 'type' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['type'] - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + ' (' + deposit['repository_metadata']['name'][0]['name'] + ')' - else : - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['name'][0]['name'] - publisher_deposit = publisher_deposit + ' ; ' - else : - deposit = poa['publisher_deposit'] - if 'type' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['type'] - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + ' (' + deposit['repository_metadata']['name'][0]['name'] + ')' - else : - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['name'][0]['name'] - publisher_deposit = publisher_deposit + ' ; ' - # print (publisher_deposit) - if ('conditions' in poa): - if (type(poa['conditions']) is list): - conditions = ' ; '.join(poa['conditions']) - else: - conditions = poa['conditions'] - if ('public_notes' in poa): - if (type(poa['public_notes']) is list): - public_notes = ' ; '.join(poa['public_notes']) - else: - public_notes = poa['public_notes'] - if ('license' in poa): - licenses = poa['license'] - if (type(licenses) is not list): - licenses = [licenses] - else : - licenses = [''] - # avec article version - if ('article_version' in poa): - article_versions = poa['article_version'] - for article_version in article_versions: - for license in licenses: - if ('license' in license): - mylicense = license['license'] - else : - mylicense = '' - # avec prerequisites - if ('prerequisites' in poa) : - # avec prerequisites_funders - if ('prerequisite_funders' in poa['prerequisites']): - for prerequisite_fundersi in poa['prerequisites']['prerequisite_funders'] : - prerequisite_funders_name = prerequisite_fundersi['funder_metadata']['name'][0]['name'] - if 'acronym' in prerequisite_fundersi['funder_metadata']['name'][0]: - prerequisite_funders_name = prerequisite_funders_name + ' (' + prerequisite_fundersi['funder_metadata']['name'][0]['acronym'] + ')' - if 'identifiers' in prerequisite_fundersi['funder_metadata'] : - for fund_identifier in prerequisite_fundersi['funder_metadata']['identifiers'] : - if fund_identifier['type'] == 'fundref': - prerequisite_funders_fundref = fund_identifier['identifier'] - if fund_identifier['type'] == 'ror': - prerequisite_funders_ror = fund_identifier['identifier'] - if 'country' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_country = prerequisite_fundersi['funder_metadata']['country'] - if 'url' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_url = prerequisite_fundersi['funder_metadata']['url'][0]['url'] - prerequisite_funders_sherpa_id = prerequisite_fundersi['funder_metadata']['id'] - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites_funders - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - - # sans article version - else : - if (type(licenses) is not list): - licenses = [licenses] - for license in licenses: - if ('license' in license): - mylicense = license['license'] - else : - mylicense = '' - # avec prerequisites - if ('prerequisites' in poa) : - # avec prerequisites_funders - if ('prerequisite_funders' in poa['prerequisites']): - for prerequisite_fundersi in poa['prerequisites']['prerequisite_funders'] : - prerequisite_funders_name = prerequisite_fundersi['funder_metadata']['name'][0]['name'] - if 'acronym' in prerequisite_fundersi['funder_metadata']['name'][0]: - prerequisite_funders_name = prerequisite_funders_name + ' (' + prerequisite_fundersi['funder_metadata']['name'][0]['acronym'] + ')' - if 'identifiers' in prerequisite_fundersi['funder_metadata'] : - for fund_identifier in prerequisite_fundersi['funder_metadata']['identifiers'] : - if fund_identifier['type'] == 'fundref': - prerequisite_funders_fundref = fund_identifier['identifier'] - if fund_identifier['type'] == 'ror': - prerequisite_funders_ror = fund_identifier['identifier'] - if 'country' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_country = prerequisite_fundersi['funder_metadata']['country'] - if 'url' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_url = prerequisite_fundersi['funder_metadata']['url'][0]['url'] - prerequisite_funders_sherpa_id = prerequisite_fundersi['funder_metadata']['id'] - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites_funders - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans permitted_oa - else : - print ('permitted_oa MISSING') - else : - print ('id MISSING') - - -# In[43]: - - -sherpa_policies - - -# In[44]: - - -# convertir l'index en id -sherpa_policies = sherpa_policies.reset_index() -# ajout de l'id avec l'index + 1 -sherpa_policies['id'] = sherpa_policies['index'] + 1 -del sherpa_policies['index'] -sherpa_policies - - -# In[45]: - - -# export csv -sherpa_policies.to_csv('sample/sherpa_policies_brut.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[46]: - - -# export excel -sherpa_policies.to_excel('sample/sherpa_policies_brut.xlsx', index=False) - - -# ## Calcul de la catégorie "green" et export final des journaux - -# In[47]: - - -sherpa_policies - - -# In[48]: - - -sherpa_policies_ir = sherpa_policies.loc[(sherpa_policies['archiving'] == True) & (sherpa_policies['article_version'] == 'published') & (sherpa_policies['prerequisite_funders'].isna())][['journal', 'embargo', 'license', 'conditions']] -sherpa_policies_ir - - -# In[49]: - - -# dedup -sherpa_policies_ir_id = sherpa_policies_ir[['journal', 'embargo']].sort_values(by=['journal', 'embargo']) -sherpa_policies_ir_dedup = sherpa_policies_ir_id.drop_duplicates(subset='journal') -sherpa_policies_ir_dedup - - -# In[50]: - - -# ajout de la ctégorie green (2) -sherpa_policies_ir_dedup['oa_status'] = 2 -sherpa_policies_ir_dedup - - -# In[51]: - - -# merge avec les revues -sherpa_policies_ir_dedup = sherpa_policies_ir_dedup.rename(columns={'journal' : 'id'}) -journals_export = pd.merge(journals_export, sherpa_policies_ir_dedup, on='id', how='left') -journals_export - - -# In[52]: - - -# choix de la catégorie OA -journals_export['oa_status'] = journals_export['oa_status_x'] -journals_export.loc[(journals_export['oa_status_x'] == 1) & (journals_export['oa_status_y'].notna()), 'oa_status'] = journals_export['oa_status_y'] -journals_export - - -# In[53]: - - -# 6 : Diamond -# 5 : Gold -# 4 : Full -# 3 : Hybrid -# 2 : Green -# 1 : UNKNOWN -journals_export['oa_status'].value_counts() - - -# In[54]: - - -del journals_export['embargo'] -del journals_export['oa_status_x'] -del journals_export['oa_status_y'] -journals_export - - -# In[55]: - - -journals_export['oa_status'] = journals_export['oa_status'].astype(int) -journals_export - - -# In[56]: - - -# export csv -journals_export.to_csv('sample/journal_fin_sherpa.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[57]: - - -# export excel -journals_export.to_excel('sample/journal_fin_sherpa.xlsx', index=False) - - -# In[58]: - - -# export csv -sherpa_policies_ir_dedup.to_csv('sample/journal_ir.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[59]: - - -# export excel -sherpa_policies_ir_dedup.to_excel('sample/journal_ir.xlsx', index=False) - - -# In[ ]: - - - - diff --git a/import_scripts/07_oacct_sherpa_publishers.md b/import_scripts/07_oacct_sherpa_publishers.md deleted file mode 100644 index 2a7ef957..00000000 --- a/import_scripts/07_oacct_sherpa_publishers.md +++ /dev/null @@ -1,4401 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Table Journals Publishers : ajout des informations de Sherpa - - -```python -import pandas as pd -import csv -import json -import numpy as np -``` - - -```python -publishers_issn = pd.read_csv('sample/publishers_brut.tsv', encoding='utf-8', header=0, sep='\t') -publishers_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idnameid
0Revue_Médicale_SuisseRevue Médicale Suisse1
1American_Physical_SocietyAmerican Physical Society2
2Public_Library_of_SciencePublic Library of Science3
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
............
376Tipografia_La_CommercialeTipografia La Commerciale377
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378
378Excerpta_MedicaExcerpta Medica379
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380
380999999UNKNOWN999999
-

381 rows × 3 columns

-
- - - - -```python -# import ids -publisher_ids = pd.read_csv('sample/journals_publishers_ids.tsv', encoding='utf-8', header=0, sep='\t') -publisher_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idpublisher
011
122
233
344
445
.........
94099747
94199875
9429998
9431000119
9441001217
-

945 rows × 2 columns

-
- - - - -```python -# renommage id -publisher_ids = publisher_ids.rename(columns = {'id': 'journal'}) -publisher_ids = publisher_ids.rename(columns = {'publisher': 'id'}) -``` - - -```python -# dédoublonage par publisher id -publisher_ids_dedup = publisher_ids.drop_duplicates(subset='id') -publisher_ids_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalid
011
122
233
344
445
.........
929987376
930987377
932989378
934991379
937994380
-

380 rows × 2 columns

-
- - - - -```python -# merge avec journals -publisher = pd.merge(publishers_issn, publisher_ids_dedup, on='id', how='left') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idnameidjournal
0Revue_Médicale_SuisseRevue Médicale Suisse11.0
1American_Physical_SocietyAmerican Physical Society22.0
2Public_Library_of_SciencePublic Library of Science33.0
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...44.0
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...54.0
...............
376Tipografia_La_CommercialeTipografia La Commerciale377987.0
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0
378Excerpta_MedicaExcerpta Medica379991.0
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380994.0
380999999UNKNOWN999999NaN
-

381 rows × 4 columns

-
- - - - -```python -# ajout des valeurs de sherpa -publisher_sherpa = pd.read_csv('sample/publisher_sherpa.tsv', encoding='utf-8', header=0, sep='\t') -publisher_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idnamecountrytypeurl
053245John Wiley and Sonsgbformer_publisherhttp://www.wiley.com/
14984American Chemical Societyussociety_publisherhttp://pubs.acs.org/
2789126Acoustical Society of Americaussociety_publisherhttp://acousticalsociety.org/
31663291Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
48073291Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
.....................
80387010American Physical Societyussociety_publisherhttp://www.aps.org/
8044110American Physical Societyussociety_publisherhttp://www.aps.org/
8058010American Physical Societyussociety_publisherhttp://www.aps.org/
80653310American Physical Societyussociety_publisherhttp://www.aps.org/
80760810American Physical Societyussociety_publisherhttp://www.aps.org/
-

808 rows × 6 columns

-
- - - - -```python -# renommage ids -publisher_sherpa = publisher_sherpa.rename(columns = {'publisher_id': 'publisher_id_sherpa', 'url': 'website_sherpa', 'country': 'iso_code'}) -``` - - -```python -# merge avec ids journals -publisher = pd.merge(publisher, publisher_sherpa, on='journal', how='left') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idname_xidjournalpublisher_id_sherpaname_yiso_codetypewebsite_sherpa
0Revue_Médicale_SuisseRevue Médicale Suisse11.0NaNNaNNaNNaNNaN
1American_Physical_SocietyAmerican Physical Society22.010.0American Physical Societyussociety_publisherhttp://www.aps.org/
2Public_Library_of_SciencePublic Library of Science33.0112.0Public Library of Scienceuscommercial_publisherhttp://www.plos.org/
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...44.0NaNNaNNaNNaNNaN
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...54.0NaNNaNNaNNaNNaN
..............................
376Tipografia_La_CommercialeTipografia La Commerciale377987.03291.0Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaNNaNNaNNaNNaN
378Excerpta_MedicaExcerpta Medica379991.030.0Elsevieruscommercial_publisherhttp://www.elsevier.com/
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380994.0NaNNaNNaNNaNNaN
380999999UNKNOWN999999NaNNaNNaNNaNNaNNaN
-

381 rows × 9 columns

-
- - - - -```python -# renommage names -publisher = publisher.rename(columns = {'name_x': 'name_issn', 'name_y': 'name_sherpa'}) -``` - - -```python -# ajout des informations à partir des revues -publisher_journals = pd.read_csv('sample/journals_publishers_brut.tsv', encoding='utf-8', header=0, sep='\t', usecols=['id', 'url']) -publisher_journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idurl
01NaN
12http://prl.aps.org/
23http://www.plosone.org/
34NaN
45http://ojps.aip.org/prbo/
.........
906997NaN
907998http://www.jpedsurg.org
908999http://www.springerlink.com/content/100451
9091000NaN
9101001https://www.physiology.org/journal/jappl
-

911 rows × 2 columns

-
- - - - -```python -# renommage id -publisher_journals = publisher_journals.rename(columns = {'id': 'journal'}) -``` - - -```python -# merge avec ids journals -publisher = pd.merge(publisher, publisher_journals, on='journal', how='left') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idname_issnidjournalpublisher_id_sherpaname_sherpaiso_codetypewebsite_sherpaurl
0Revue_Médicale_SuisseRevue Médicale Suisse11.0NaNNaNNaNNaNNaNNaN
1American_Physical_SocietyAmerican Physical Society22.010.0American Physical Societyussociety_publisherhttp://www.aps.org/http://prl.aps.org/
2Public_Library_of_SciencePublic Library of Science33.0112.0Public Library of Scienceuscommercial_publisherhttp://www.plos.org/http://www.plosone.org/
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...44.0NaNNaNNaNNaNNaNNaN
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...54.0NaNNaNNaNNaNNaNNaN
.................................
376Tipografia_La_CommercialeTipografia La Commerciale377987.03291.0Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...NaN
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaNNaNNaNNaNNaNNaN
378Excerpta_MedicaExcerpta Medica379991.030.0Elsevieruscommercial_publisherhttp://www.elsevier.com/NaN
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380994.0NaNNaNNaNNaNNaNNaN
380999999UNKNOWN999999NaNNaNNaNNaNNaNNaNNaN
-

381 rows × 10 columns

-
- - - - -```python -# renommage names -del publisher['publisher_id'] -del publisher['publisher_id_sherpa'] -del publisher['type'] -publisher = publisher.rename(columns = {'url' : 'website_issn_journal'}) -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name_issnidjournalname_sherpaiso_codewebsite_sherpawebsite_issn_journal
0Revue Médicale Suisse11.0NaNNaNNaNNaN
1American Physical Society22.0American Physical Societyushttp://www.aps.org/http://prl.aps.org/
2Public Library of Science33.0Public Library of Scienceushttp://www.plos.org/http://www.plosone.org/
3The Global Studies Institute de l’Université d...44.0NaNNaNNaNNaN
4Universitat de València, Departamento de Teorí...54.0NaNNaNNaNNaN
........................
376Tipografia La Commerciale377987.0Springergbhttps://www.springernature.com/gp/products/jou...NaN
377Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaNNaNNaNNaN
378Excerpta Medica379991.0Elsevierushttp://www.elsevier.com/NaN
379Generative Grammar Group of the Department of ...380994.0NaNNaNNaNNaN
380UNKNOWN999999NaNNaNNaNNaNNaN
-

381 rows × 7 columns

-
- - - - -```python -# ajout des champs vides des vides et int -publisher['city'] = '' -publisher['state'] = '' -publisher['oa_policies'] = '' -publisher['starting_year'] = 0 -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name_issnidjournalname_sherpaiso_codewebsite_sherpawebsite_issn_journalcitystateoa_policiesstarting_year
0Revue Médicale Suisse11.0NaNNaNNaNNaN0
1American Physical Society22.0American Physical Societyushttp://www.aps.org/http://prl.aps.org/0
2Public Library of Science33.0Public Library of Scienceushttp://www.plos.org/http://www.plosone.org/0
3The Global Studies Institute de l’Université d...44.0NaNNaNNaNNaN0
4Universitat de València, Departamento de Teorí...54.0NaNNaNNaNNaN0
....................................
376Tipografia La Commerciale377987.0Springergbhttps://www.springernature.com/gp/products/jou...NaN0
377Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaNNaNNaNNaN0
378Excerpta Medica379991.0Elsevierushttp://www.elsevier.com/NaN0
379Generative Grammar Group of the Department of ...380994.0NaNNaNNaNNaN0
380UNKNOWN999999NaNNaNNaNNaNNaN0
-

381 rows × 11 columns

-
- - - - -```python -# iso_code en majuscules -publisher['iso_code'] = publisher['iso_code'].str.upper() -# ajout de la valeur pour unknown -publisher['iso_code'] = publisher['iso_code'].fillna('__') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name_issnidjournalname_sherpaiso_codewebsite_sherpawebsite_issn_journalcitystateoa_policiesstarting_year
0Revue Médicale Suisse11.0NaN__NaNNaN0
1American Physical Society22.0American Physical SocietyUShttp://www.aps.org/http://prl.aps.org/0
2Public Library of Science33.0Public Library of ScienceUShttp://www.plos.org/http://www.plosone.org/0
3The Global Studies Institute de l’Université d...44.0NaN__NaNNaN0
4Universitat de València, Departamento de Teorí...54.0NaN__NaNNaN0
....................................
376Tipografia La Commerciale377987.0SpringerGBhttps://www.springernature.com/gp/products/jou...NaN0
377Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaN__NaNNaN0
378Excerpta Medica379991.0ElsevierUShttp://www.elsevier.com/NaN0
379Generative Grammar Group of the Department of ...380994.0NaN__NaNNaN0
380UNKNOWN999999NaNNaN__NaNNaN0
-

381 rows × 11 columns

-
- - - - -```python -# merge avec countries -country = pd.read_csv('sample/country.tsv', usecols=('iso_code', 'id'), encoding='utf-8', header=0, sep='\t') -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iso_codeid
0AF1
1AL2
2DZ3
3AS4
4AD5
.........
246ZM247
247ZW248
248AX249
249OI250
250__999999
-

251 rows × 2 columns

-
- - - - -```python -country = country.rename(columns={'id': 'country'}) -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iso_codecountry
0AF1
1AL2
2DZ3
3AS4
4AD5
.........
246ZM247
247ZW248
248AX249
249OI250
250__999999
-

251 rows × 2 columns

-
- - - - -```python -publisher = pd.merge(publisher, country, on='iso_code', how='left') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name_issnidjournalname_sherpaiso_codewebsite_sherpawebsite_issn_journalcitystateoa_policiesstarting_yearcountry
0Revue Médicale Suisse11.0NaN__NaNNaN0999999
1American Physical Society22.0American Physical SocietyUShttp://www.aps.org/http://prl.aps.org/0236
2Public Library of Science33.0Public Library of ScienceUShttp://www.plos.org/http://www.plosone.org/0236
3The Global Studies Institute de l’Université d...44.0NaN__NaNNaN0999999
4Universitat de València, Departamento de Teorí...54.0NaN__NaNNaN0999999
.......................................
376Tipografia La Commerciale377987.0SpringerGBhttps://www.springernature.com/gp/products/jou...NaN0234
377Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaN__NaNNaN0999999
378Excerpta Medica379991.0ElsevierUShttp://www.elsevier.com/NaN0236
379Generative Grammar Group of the Department of ...380994.0NaN__NaNNaN0999999
380UNKNOWN999999NaNNaN__NaNNaN0999999
-

381 rows × 12 columns

-
- - - - -```python -# garder sherpa puis issn.org -publisher.loc[publisher['name_sherpa'].notna(), 'name'] = publisher['name_sherpa'] -publisher.loc[publisher['name_sherpa'].isna(), 'name'] = publisher['name_issn'] -publisher.loc[publisher['website_sherpa'].notna(), 'website'] = publisher['website_sherpa'] -publisher.loc[publisher['website_sherpa'].isna(), 'website'] = publisher['website_issn_journal'] -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name_issnidjournalname_sherpaiso_codewebsite_sherpawebsite_issn_journalcitystateoa_policiesstarting_yearcountrynamewebsite
0Revue Médicale Suisse11.0NaN__NaNNaN0999999Revue Médicale SuisseNaN
1American Physical Society22.0American Physical SocietyUShttp://www.aps.org/http://prl.aps.org/0236American Physical Societyhttp://www.aps.org/
2Public Library of Science33.0Public Library of ScienceUShttp://www.plos.org/http://www.plosone.org/0236Public Library of Sciencehttp://www.plos.org/
3The Global Studies Institute de l’Université d...44.0NaN__NaNNaN0999999The Global Studies Institute de l’Université d...NaN
4Universitat de València, Departamento de Teorí...54.0NaN__NaNNaN0999999Universitat de València, Departamento de Teorí...NaN
.............................................
376Tipografia La Commerciale377987.0SpringerGBhttps://www.springernature.com/gp/products/jou...NaN0234Springerhttps://www.springernature.com/gp/products/jou...
377Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaN__NaNNaN0999999Red.: Prof. Dr. F. Cavalli, Istituto oncologic...NaN
378Excerpta Medica379991.0ElsevierUShttp://www.elsevier.com/NaN0236Elsevierhttp://www.elsevier.com/
379Generative Grammar Group of the Department of ...380994.0NaN__NaNNaN0999999Generative Grammar Group of the Department of ...NaN
380UNKNOWN999999NaNNaN__NaNNaN0999999UNKNOWNNaN
-

381 rows × 14 columns

-
- - - - -```python -# garder les champs utiles pour l'éditeur -publisher_export = publisher[['id', 'name', 'country', 'city', 'state', 'starting_year', 'website', 'oa_policies']] -``` - - -```python -# supprimer les doublons -publisher_export = publisher_export.drop_duplicates(subset='id') -publisher_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamecountrycitystatestarting_yearwebsiteoa_policies
01Revue Médicale Suisse9999990NaN
12American Physical Society2360http://www.aps.org/
23Public Library of Science2360http://www.plos.org/
34The Global Studies Institute de l’Université d...9999990NaN
45Universitat de València, Departamento de Teorí...9999990NaN
...........................
376377Springer2340https://www.springernature.com/gp/products/jou...
377378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...9999990NaN
378379Elsevier2360http://www.elsevier.com/
379380Generative Grammar Group of the Department of ...9999990NaN
380999999UNKNOWN9999990NaN
-

381 rows × 8 columns

-
- - - - -```python -# remplacement des vides et id à int -publisher_export['website'] = publisher_export['website'].fillna('') -publisher_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamecountrycitystatestarting_yearwebsiteoa_policies
01Revue Médicale Suisse9999990
12American Physical Society2360http://www.aps.org/
23Public Library of Science2360http://www.plos.org/
34The Global Studies Institute de l’Université d...9999990
45Universitat de València, Departamento de Teorí...9999990
...........................
376377Springer2340https://www.springernature.com/gp/products/jou...
377378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...9999990
378379Elsevier2360http://www.elsevier.com/
379380Generative Grammar Group of the Department of ...9999990
380999999UNKNOWN9999990
-

381 rows × 8 columns

-
- - - - -```python -# merge pour avoir les titres -publisher_ids_dedup = pd.merge(publisher_ids_dedup, publisher_export[['id', 'name']], on='id', how='left') -publisher_ids_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalidname
011Revue Médicale Suisse
122American Physical Society
233Public Library of Science
344The Global Studies Institute de l’Université d...
445Universitat de València, Departamento de Teorí...
............
375987376Springer
376987377Springer
377989378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...
378991379Elsevier
379994380Generative Grammar Group of the Department of ...
-

380 rows × 3 columns

-
- - - - -```python -# garder les ids avant le dédoublonage pour la correction du publisher_ids_dedup -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'id': 'publisher_av_dedup'}) -publisher_ids_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_av_dedupname
011Revue Médicale Suisse
122American Physical Society
233Public Library of Science
344The Global Studies Institute de l’Université d...
445Universitat de València, Departamento de Teorí...
............
375987376Springer
376987377Springer
377989378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...
378991379Elsevier
379994380Generative Grammar Group of the Department of ...
-

380 rows × 3 columns

-
- - - - -```python -publisher_export_dedup = publisher_export.drop_duplicates(subset='name') -publisher_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamecountrycitystatestarting_yearwebsiteoa_policies
01Revue Médicale Suisse9999990
12American Physical Society2360http://www.aps.org/
23Public Library of Science2360http://www.plos.org/
34The Global Studies Institute de l’Université d...9999990
45Universitat de València, Departamento de Teorí...9999990
...........................
371372[American Medical Association]9999990http://archneur.jamanetwork.com/issues.aspx
374375Société botanique de Genève9999990
377378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...9999990
379380Generative Grammar Group of the Department of ...9999990
380999999UNKNOWN9999990
-

196 rows × 8 columns

-
- - - - -```python -del publisher_export_dedup['id'] -# convertir l'index en id -publisher_export_dedup = publisher_export_dedup.reset_index() -# ajout de l'id avec l'index + 1 -publisher_export_dedup['id'] = publisher_export_dedup['index'] + 1 -del publisher_export_dedup['index'] -publisher_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
0Revue Médicale Suisse99999901
1American Physical Society2360http://www.aps.org/2
2Public Library of Science2360http://www.plos.org/3
3The Global Studies Institute de l’Université d...99999904
4Universitat de València, Departamento de Teorí...99999905
...........................
191[American Medical Association]9999990http://archneur.jamanetwork.com/issues.aspx372
192Société botanique de Genève9999990375
193Red.: Prof. Dr. F. Cavalli, Istituto oncologic...9999990378
194Generative Grammar Group of the Department of ...9999990380
195UNKNOWN9999990381
-

196 rows × 8 columns

-
- - - - -```python -del publisher_export_dedup['id'] -# convertir l'index en id -publisher_export_dedup = publisher_export_dedup.reset_index() -# ajout de l'id avec l'index + 1 -publisher_export_dedup['id'] = publisher_export_dedup['index'] + 1 -del publisher_export_dedup['index'] -publisher_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
0Revue Médicale Suisse99999901
1American Physical Society2360http://www.aps.org/2
2Public Library of Science2360http://www.plos.org/3
3The Global Studies Institute de l’Université d...99999904
4Universitat de València, Departamento de Teorí...99999905
...........................
191[American Medical Association]9999990http://archneur.jamanetwork.com/issues.aspx192
192Société botanique de Genève9999990193
193Red.: Prof. Dr. F. Cavalli, Istituto oncologic...9999990194
194Generative Grammar Group of the Department of ...9999990195
195UNKNOWN9999990196
-

196 rows × 8 columns

-
- - - - -```python -# merge avec les ids d'avant Sherpa -publisher_ids_dedup = pd.merge(publisher_ids_dedup, publisher_export_dedup[['id', 'name']], on='name', how='left') -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'id': 'publisher'}) -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'journal': 'id'}) -publisher_ids_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idpublisher_av_dedupnamepublisher
011Revue Médicale Suisse1
122American Physical Society2
233Public Library of Science3
344The Global Studies Institute de l’Université d...4
445Universitat de València, Departamento de Teorí...5
...............
375987376Springer45
376987377Springer45
377989378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...194
378991379Elsevier11
379994380Generative Grammar Group of the Department of ...195
-

380 rows × 4 columns

-
- - - - -```python -# concat valeurs avec même id -del publisher_ids_dedup['publisher_av_dedup'] -del publisher_ids_dedup['name'] -publisher_ids_dedup['publisher'] = publisher_ids_dedup['publisher'].astype(str) -publisher_ids_dedup_grouped = publisher_ids_dedup.groupby('id').agg({'publisher': lambda x: ', '.join(x)}) -publisher_ids_dedup_grouped -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher
id
11
22
33
44, 5
52
......
986193
98745, 45
989194
99111
994195
-

366 rows × 1 columns

-
- - - - -```python -# modifs dans les journaux -journal = pd.read_csv('sample/journal_fin_sherpa.tsv', encoding='utf-8', header=0, sep='\t') -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguagepublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreoa_status
01Revue médicale suisseRev. méd. suisse20059999NaN215138100000NaN1
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/236124200010NaN2
23PLoS ONENaN20069999http://www.plosone.org/2361243111004.0357145
34EU-topíasEU-topías20119999NaN209124, 138, 402, 2924, 500000NaN1
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/236124600010NaN2
...................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-17262341244700010NaN2
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/2361247500010NaN2
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...83124800111NaN2
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...23412411900010NaN2
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl23612421700000NaN1
-

911 rows × 16 columns

-
- - - - -```python -# merge avec les journaux journal_fin_sherpa -journal = pd.merge(journal, publisher_ids_dedup_grouped, on='id', how='left') -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguagepublisher_xdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreoa_statuspublisher_y
01Revue médicale suisseRev. méd. suisse20059999NaN215138100000NaN11
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/236124200010NaN22
23PLoS ONENaN20069999http://www.plosone.org/2361243111004.03571453
34EU-topíasEU-topías20119999NaN209124, 138, 402, 2924, 500000NaN14, 5
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/236124600010NaN22
......................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-17262341244700010NaN2NaN
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/2361247500010NaN2NaN
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...83124800111NaN2NaN
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...23412411900010NaN2NaN
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl23612421700000NaN1NaN
-

911 rows × 17 columns

-
- - - - -```python -del journal['publisher_x'] -journal = journal.rename(columns = {'publisher_y': 'publisher'}) -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguagedoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreoa_statuspublisher
01Revue médicale suisseRev. méd. suisse20059999NaN21513800000NaN11
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/23612400010NaN22
23PLoS ONENaN20069999http://www.plosone.org/236124111004.03571453
34EU-topíasEU-topías20119999NaN209124, 138, 402, 29200000NaN14, 5
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/23612400010NaN22
...................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412400010NaN2NaN
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612400010NaN2NaN
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...8312400111NaN2NaN
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...23412400010NaN2NaN
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl23612400000NaN1NaN
-

911 rows × 16 columns

-
- - - - -```python -# esport JSON publisher -result = journal.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/journal.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -journal.to_csv('sample/journal.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -journal.to_excel('sample/journal.xlsx', index=False) -``` - - -```python -# esport JSON publisher -result = publisher_export_dedup.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/publisher.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -publisher_export_dedup.to_csv('sample/publisher.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -publisher_export_dedup.to_excel('sample/publisher.xlsx', index=False) -``` - - -```python - -``` diff --git a/import_scripts/07_oacct_sherpa_publishers.py b/import_scripts/07_oacct_sherpa_publishers.py deleted file mode 100644 index 29af8f7e..00000000 --- a/import_scripts/07_oacct_sherpa_publishers.py +++ /dev/null @@ -1,348 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Table Journals Publishers : ajout des informations de Sherpa - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np - - -# In[2]: - - -publishers_issn = pd.read_csv('sample/publishers_brut.tsv', encoding='utf-8', header=0, sep='\t') -publishers_issn - - -# In[3]: - - -# import ids -publisher_ids = pd.read_csv('sample/journals_publishers_ids.tsv', encoding='utf-8', header=0, sep='\t') -publisher_ids - - -# In[4]: - - -# renommage id -publisher_ids = publisher_ids.rename(columns = {'id': 'journal'}) -publisher_ids = publisher_ids.rename(columns = {'publisher': 'id'}) - - -# In[5]: - - -# dédoublonage par publisher id -publisher_ids_dedup = publisher_ids.drop_duplicates(subset='id') -publisher_ids_dedup - - -# In[6]: - - -# merge avec journals -publisher = pd.merge(publishers_issn, publisher_ids_dedup, on='id', how='left') -publisher - - -# In[7]: - - -# ajout des valeurs de sherpa -publisher_sherpa = pd.read_csv('sample/publisher_sherpa.tsv', encoding='utf-8', header=0, sep='\t') -publisher_sherpa - - -# In[8]: - - -# renommage ids -publisher_sherpa = publisher_sherpa.rename(columns = {'publisher_id': 'publisher_id_sherpa', 'url': 'website_sherpa', 'country': 'iso_code'}) - - -# In[9]: - - -# merge avec ids journals -publisher = pd.merge(publisher, publisher_sherpa, on='journal', how='left') -publisher - - -# In[10]: - - -# renommage names -publisher = publisher.rename(columns = {'name_x': 'name_issn', 'name_y': 'name_sherpa'}) - - -# In[11]: - - -# ajout des informations à partir des revues -publisher_journals = pd.read_csv('sample/journals_publishers_brut.tsv', encoding='utf-8', header=0, sep='\t', usecols=['id', 'url']) -publisher_journals - - -# In[12]: - - -# renommage id -publisher_journals = publisher_journals.rename(columns = {'id': 'journal'}) - - -# In[13]: - - -# merge avec ids journals -publisher = pd.merge(publisher, publisher_journals, on='journal', how='left') -publisher - - -# In[14]: - - -# renommage names -del publisher['publisher_id'] -del publisher['publisher_id_sherpa'] -del publisher['type'] -publisher = publisher.rename(columns = {'url' : 'website_issn_journal'}) -publisher - - -# In[15]: - - -# ajout des champs vides des vides et int -publisher['city'] = '' -publisher['state'] = '' -publisher['oa_policies'] = '' -publisher['starting_year'] = 0 -publisher - - -# In[16]: - - -# iso_code en majuscules -publisher['iso_code'] = publisher['iso_code'].str.upper() -# ajout de la valeur pour unknown -publisher['iso_code'] = publisher['iso_code'].fillna('__') -publisher - - -# In[17]: - - -# merge avec countries -country = pd.read_csv('sample/country.tsv', usecols=('iso_code', 'id'), encoding='utf-8', header=0, sep='\t') -country - - -# In[18]: - - -country = country.rename(columns={'id': 'country'}) -country - - -# In[19]: - - -publisher = pd.merge(publisher, country, on='iso_code', how='left') -publisher - - -# In[20]: - - -# garder sherpa puis issn.org -publisher.loc[publisher['name_sherpa'].notna(), 'name'] = publisher['name_sherpa'] -publisher.loc[publisher['name_sherpa'].isna(), 'name'] = publisher['name_issn'] -publisher.loc[publisher['website_sherpa'].notna(), 'website'] = publisher['website_sherpa'] -publisher.loc[publisher['website_sherpa'].isna(), 'website'] = publisher['website_issn_journal'] -publisher - - -# In[21]: - - -# garder les champs utiles pour l'éditeur -publisher_export = publisher[['id', 'name', 'country', 'city', 'state', 'starting_year', 'website', 'oa_policies']] - - -# In[22]: - - -# supprimer les doublons -publisher_export = publisher_export.drop_duplicates(subset='id') -publisher_export - - -# In[23]: - - -# remplacement des vides et id à int -publisher_export['website'] = publisher_export['website'].fillna('') -publisher_export - - -# In[24]: - - -# merge pour avoir les titres -publisher_ids_dedup = pd.merge(publisher_ids_dedup, publisher_export[['id', 'name']], on='id', how='left') -publisher_ids_dedup - - -# In[25]: - - -# garder les ids avant le dédoublonage pour la correction du publisher_ids_dedup -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'id': 'publisher_av_dedup'}) -publisher_ids_dedup - - -# In[26]: - - -publisher_export_dedup = publisher_export.drop_duplicates(subset='name') -publisher_export_dedup - - -# In[27]: - - -del publisher_export_dedup['id'] -# convertir l'index en id -publisher_export_dedup = publisher_export_dedup.reset_index() -# ajout de l'id avec l'index + 1 -publisher_export_dedup['id'] = publisher_export_dedup['index'] + 1 -del publisher_export_dedup['index'] -publisher_export_dedup - - -# In[28]: - - -del publisher_export_dedup['id'] -# convertir l'index en id -publisher_export_dedup = publisher_export_dedup.reset_index() -# ajout de l'id avec l'index + 1 -publisher_export_dedup['id'] = publisher_export_dedup['index'] + 1 -del publisher_export_dedup['index'] -publisher_export_dedup - - -# In[29]: - - -# merge avec les ids d'avant Sherpa -publisher_ids_dedup = pd.merge(publisher_ids_dedup, publisher_export_dedup[['id', 'name']], on='name', how='left') -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'id': 'publisher'}) -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'journal': 'id'}) -publisher_ids_dedup - - -# In[30]: - - -# concat valeurs avec même id -del publisher_ids_dedup['publisher_av_dedup'] -del publisher_ids_dedup['name'] -publisher_ids_dedup['publisher'] = publisher_ids_dedup['publisher'].astype(str) -publisher_ids_dedup_grouped = publisher_ids_dedup.groupby('id').agg({'publisher': lambda x: ', '.join(x)}) -publisher_ids_dedup_grouped - - -# In[31]: - - -# modifs dans les journaux -journal = pd.read_csv('sample/journal_fin_sherpa.tsv', encoding='utf-8', header=0, sep='\t') -journal - - -# In[32]: - - -# merge avec les journaux journal_fin_sherpa -journal = pd.merge(journal, publisher_ids_dedup_grouped, on='id', how='left') -journal - - -# In[33]: - - -del journal['publisher_x'] -journal = journal.rename(columns = {'publisher_y': 'publisher'}) -journal - - -# In[34]: - - -# esport JSON publisher -result = journal.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/journal.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[35]: - - -# export csv -journal.to_csv('sample/journal.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[36]: - - -# export excel -journal.to_excel('sample/journal.xlsx', index=False) - - -# In[37]: - - -# esport JSON publisher -result = publisher_export_dedup.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/publisher.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[38]: - - -# export csv -publisher_export_dedup.to_csv('sample/publisher.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[39]: - - -# export excel -publisher_export_dedup.to_excel('sample/publisher.xlsx', index=False) - - -# In[ ]: - - - - diff --git a/import_scripts/08_oacct_sherpa_issns.md b/import_scripts/08_oacct_sherpa_issns.md deleted file mode 100644 index 8989dea2..00000000 --- a/import_scripts/08_oacct_sherpa_issns.md +++ /dev/null @@ -1,2204 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Table ISSNs - - -```python -import pandas as pd -import csv -import json -import numpy as np -``` - - -```python -issns = pd.read_csv('sample/issn_brut.tsv', encoding='utf-8', sep='\t') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeid
00001-28150001-2815532PRINT11
11399-00390001-2815532NaN32
20001-48420001-4842498PRINT13
31520-48980001-4842498NaN34
40001-49660001-4966789PRINT15
.....................
17552470-00452470-0045533OTHER31756
17562470-00532470-0045533NaN31757
17572475-99532475-9953608ELECTRONIC21758
17582504-44272504-4427994PRINT11759
17592504-44352504-4427994NaN31760
-

1760 rows × 6 columns

-
- - - -## Ajout du format à partir de Sherpa - - -```python -# ajout du format par sherpa -issn_sherpa = pd.read_csv('sample/issn_sherpa.tsv', encoding='utf-8', sep='\t') -issn_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11print
11399-00390001-2815532NaN32electronic
20001-48420001-4842498PRINT13print
31520-48980001-4842498NaN34electronic
40001-49660001-4966789PRINT15print
........................
17552470-00452470-0045533OTHER31756print
17562470-00532470-0045533NaN31757electronic
17572475-99532475-9953608ELECTRONIC21758electronic
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -issn_sherpa['type'] = issn_sherpa['type'].str.upper() -issn_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11PRINT
11399-00390001-2815532NaN32ELECTRONIC
20001-48420001-4842498PRINT13PRINT
31520-48980001-4842498NaN34ELECTRONIC
40001-49660001-4966789PRINT15PRINT
........................
17552470-00452470-0045533OTHER31756PRINT
17562470-00532470-0045533NaN31757ELECTRONIC
17572475-99532475-9953608ELECTRONIC21758ELECTRONIC
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -issns = pd.merge(issns, issn_sherpa[['issn', 'type']], on='issn', how='outer') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11PRINT
11399-00390001-2815532NaN32ELECTRONIC
20001-48420001-4842498PRINT13PRINT
31520-48980001-4842498NaN34ELECTRONIC
40001-49660001-4966789PRINT15PRINT
........................
17552470-00452470-0045533OTHER31756PRINT
17562470-00532470-0045533NaN31757ELECTRONIC
17572475-99532475-9953608ELECTRONIC21758ELECTRONIC
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -issns['format'].value_counts() -``` - - - - - PRINT 816 - ELECTRONIC 90 - OTHER 2 - Name: format, dtype: int64 - - - - -```python -issns['type'].value_counts() -``` - - - - - PRINT 750 - ELECTRONIC 575 - Name: type, dtype: int64 - - - - -```python -# tester les lignes sans type -issns.loc[issns['format'].isnull()].loc[issns['type'].isnull()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
51520-85240001-4966789NaN36NaN
61520-90240001-4966789NaN37NaN
171943-29840002-78638NaN318NaN
231555-71620002-9343985NaN324NaN
272163-57730002-9513787NaN328NaN
........................
17222160-90472160-9020467NaN31723NaN
17292340-115X2174-84544NaN31730NaN
17322211-32822211-2855990NaN31733NaN
17392297-70072297-6981618NaN31740NaN
17592504-44352504-4427994NaN31760NaN
-

326 rows × 7 columns

-
- - - - -```python -# tester les lignes avec type égal -issns.loc[issns['format'] == issns['type']] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11PRINT
20001-48420001-4842498PRINT13PRINT
40001-49660001-4966789PRINT15PRINT
70001-62680001-6268166PRINT18PRINT
90001-63220001-6322807PRINT110PRINT
........................
17482380-81952380-8195947ELECTRONIC21749ELECTRONIC
17492469-990X2469-990X684ELECTRONIC21750ELECTRONIC
17512469-99502469-995041PRINT11752PRINT
17532470-00102470-001080PRINT11754PRINT
17572475-99532475-9953608ELECTRONIC21758ELECTRONIC
-

774 rows × 7 columns

-
- - - - -```python -# tester les lignes avec type diff -issns.loc[issns['format'] != issns['type']] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
11399-00390001-2815532NaN32ELECTRONIC
31520-48980001-4842498NaN34ELECTRONIC
51520-85240001-4966789NaN36NaN
61520-90240001-4966789NaN37NaN
80942-09400001-6268166NaN39ELECTRONIC
........................
17542470-00292470-001080NaN31755ELECTRONIC
17552470-00452470-0045533OTHER31756PRINT
17562470-00532470-0045533NaN31757ELECTRONIC
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

986 rows × 7 columns

-
- - - - -```python -# attribution de l'id du type avec préference par ISSN.org puis Sherpa -# PRINT = 1 -# ELECTRONIC = 2 -# OTHER = 3 -issns['issn_type'] = issns['format'] -issns.loc[issns['format'].isna(), 'issn_type'] = issns['type'] -issns['issn_type'] = issns['issn_type'].str.replace('PRINT', '1') -issns['issn_type'] = issns['issn_type'].str.replace('ELECTRONIC', '2') -issns['issn_type'] = issns['issn_type'].str.replace('OTHER', '3') -issns['issn_type'] = issns['issn_type'].fillna(3) -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11PRINT
11399-00390001-2815532NaN22ELECTRONIC
20001-48420001-4842498PRINT13PRINT
31520-48980001-4842498NaN24ELECTRONIC
40001-49660001-4966789PRINT15PRINT
........................
17552470-00452470-0045533OTHER31756PRINT
17562470-00532470-0045533NaN21757ELECTRONIC
17572475-99532475-9953608ELECTRONIC21758ELECTRONIC
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -# test de diffs -issns.loc[issns['format'] == 'PRINT'].loc[issns['type'] == 'ELECTRONIC'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
11230959-81380959-8138383PRINT11124ELECTRONIC
11911025-496X1025-496X779PRINT11192ELECTRONIC
14511465-69061465-6906773PRINT11452ELECTRONIC
-
- - - - -```python -# test de diffs -issns.loc[issns['format'] == 'ELECTRONIC'].loc[issns['type'] == 'PRINT'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
1210009-73300009-7330948ELECTRONIC2122PRINT
3600024-37950024-3795968ELECTRONIC2361PRINT
5950163-38640163-3864701ELECTRONIC2596PRINT
6530194-911X0194-911X871ELECTRONIC2654PRINT
6650197-93370197-9337672ELECTRONIC2666PRINT
7110270-64740270-647473ELECTRONIC2712PRINT
7340278-23910278-2391521ELECTRONIC2735PRINT
9280743-74630743-7463114ELECTRONIC2929PRINT
12051040-46511040-4651886ELECTRONIC21206PRINT
12431059-77941059-7794440ELECTRONIC21244PRINT
12871079-56421079-5642468ELECTRONIC21288PRINT
15031528-35421528-3542547ELECTRONIC21504PRINT
15131530-69841530-698436ELECTRONIC21514PRINT
15151534-43201534-4320735ELECTRONIC21516PRINT
15381549-96181549-9618158ELECTRONIC21539PRINT
15461553-734X1553-734X240ELECTRONIC21547PRINT
16611876-61021876-6102249ELECTRONIC21662PRINT
16621877-05681877-0568675ELECTRONIC21663PRINT
16631877-70581877-7058632ELECTRONIC21664PRINT
17302211-12472211-1247113ELECTRONIC21731PRINT
-
- - - - -```python -# test de diffs -issns.loc[issns['format'].isna()].loc[issns['type'] == 'PRINT'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
310003-26700003-2670415NaN132PRINT
1270010-36160010-3616417NaN1128PRINT
1510012-94020012-9402237NaN1152PRINT
2160018-93750018-9375361NaN1217PRINT
3760026-45980026-4598496NaN1377PRINT
6430178-80510178-8051999NaN1644PRINT
8381388-61500368-4466499NaN1839PRINT
11921560-79171025-496X779NaN11193PRINT
12011126-67081029-84797NaN11202PRINT
12491063-651X1063-651X588NaN11250PRINT
15311538-79331538-7836148NaN11532PRINT
15601569-92931569-9285822NaN11561PRINT
15971662-45481662-453X421NaN11598PRINT
16588756-32821873-2763488NaN11659PRINT
-
- - - - -```python -# convertir journal en int -issns['journal'] = issns['journal'].astype(int) -``` - - -```python -# convertir l'index en id -issns = issns.reset_index() -issns['id'] = issns['index'] + 1 -del issns['index'] -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11PRINT
11399-00390001-2815532NaN22ELECTRONIC
20001-48420001-4842498PRINT13PRINT
31520-48980001-4842498NaN24ELECTRONIC
40001-49660001-4966789PRINT15PRINT
........................
17552470-00452470-0045533OTHER31756PRINT
17562470-00532470-0045533NaN21757ELECTRONIC
17572475-99532475-9953608ELECTRONIC21758ELECTRONIC
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -issns['issn_type'] = issns['issn_type'].astype(int) -``` - - -```python -issns_export = issns[['id', 'issn', 'journal', 'issn_type']] -issns_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnjournalissn_type
010001-28155321
121399-00395322
230001-48424981
341520-48984982
450001-49667891
...............
175517562470-00455333
175617572470-00535332
175717582475-99536082
175817592504-44279941
175917602504-44359943
-

1760 rows × 4 columns

-
- - - - -```python -# supprimer les doublons par ISSN -issns_export = issns_export.drop_duplicates(subset='issn') -issns_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnjournalissn_type
010001-28155321
121399-00395322
230001-48424981
341520-48984982
450001-49667891
...............
175517562470-00455333
175617572470-00535332
175717582475-99536082
175817592504-44279941
175917602504-44359943
-

1760 rows × 4 columns

-
- - - - -```python -# esport JSON -result = issns_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/issn.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -issns_export.to_csv('sample/issn.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -issns_export.to_excel('sample/issn.xlsx', index=False) -``` diff --git a/import_scripts/08_oacct_sherpa_issns.py b/import_scripts/08_oacct_sherpa_issns.py deleted file mode 100644 index b48bac00..00000000 --- a/import_scripts/08_oacct_sherpa_issns.py +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Table ISSNs - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np - - -# In[2]: - - -issns = pd.read_csv('sample/issn_brut.tsv', encoding='utf-8', sep='\t') -issns - - -# ## Ajout du format à partir de Sherpa - -# In[3]: - - -# ajout du format par sherpa -issn_sherpa = pd.read_csv('sample/issn_sherpa.tsv', encoding='utf-8', sep='\t') -issn_sherpa - - -# In[4]: - - -issn_sherpa['type'] = issn_sherpa['type'].str.upper() -issn_sherpa - - -# In[5]: - - -issns = pd.merge(issns, issn_sherpa[['issn', 'type']], on='issn', how='outer') -issns - - -# In[6]: - - -issns['format'].value_counts() - - -# In[7]: - - -issns['type'].value_counts() - - -# In[8]: - - -# tester les lignes sans type -issns.loc[issns['format'].isnull()].loc[issns['type'].isnull()] - - -# In[9]: - - -# tester les lignes avec type égal -issns.loc[issns['format'] == issns['type']] - - -# In[10]: - - -# tester les lignes avec type diff -issns.loc[issns['format'] != issns['type']] - - -# In[11]: - - -# attribution de l'id du type avec préference par ISSN.org puis Sherpa -# PRINT = 1 -# ELECTRONIC = 2 -# OTHER = 3 -issns['issn_type'] = issns['format'] -issns.loc[issns['format'].isna(), 'issn_type'] = issns['type'] -issns['issn_type'] = issns['issn_type'].str.replace('PRINT', '1') -issns['issn_type'] = issns['issn_type'].str.replace('ELECTRONIC', '2') -issns['issn_type'] = issns['issn_type'].str.replace('OTHER', '3') -issns['issn_type'] = issns['issn_type'].fillna(3) -issns - - -# In[12]: - - -# test de diffs -issns.loc[issns['format'] == 'PRINT'].loc[issns['type'] == 'ELECTRONIC'] - - -# In[13]: - - -# test de diffs -issns.loc[issns['format'] == 'ELECTRONIC'].loc[issns['type'] == 'PRINT'] - - -# In[14]: - - -# test de diffs -issns.loc[issns['format'].isna()].loc[issns['type'] == 'PRINT'] - - -# In[15]: - - -# convertir journal en int -issns['journal'] = issns['journal'].astype(int) - - -# In[16]: - - -# convertir l'index en id -issns = issns.reset_index() -issns['id'] = issns['index'] + 1 -del issns['index'] -issns - - -# In[17]: - - -issns['issn_type'] = issns['issn_type'].astype(int) - - -# In[18]: - - -issns_export = issns[['id', 'issn', 'journal', 'issn_type']] -issns_export - - -# In[19]: - - -# supprimer les doublons par ISSN -issns_export = issns_export.drop_duplicates(subset='issn') -issns_export - - -# In[20]: - - -# esport JSON -result = issns_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/issn.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[21]: - - -# export csv -issns_export.to_csv('sample/issn.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[22]: - - -# export excel -issns_export.to_excel('sample/issn.xlsx', index=False) - diff --git a/import_scripts/09_oacct_read_and_publish.md b/import_scripts/09_oacct_read_and_publish.md deleted file mode 100644 index df115477..00000000 --- a/import_scripts/09_oacct_read_and_publish.md +++ /dev/null @@ -1,9540 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet de modifier les données extraites des differentes sources et les exporter dans les tables de l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 08.09.2021 - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) -# definir le debut des ids -id_start = 1 -``` - -## Ajout des rabais pour les revues des licences Read & Publish - -Journals list by publisher : - * https://consortium.ch/elsevier_titlelist_publication - * https://consortium.ch/springer_titlelist_publication - * https://consortium.ch/wiley_titlelist_publish - * https://consortium.ch/tandf_titlelist_publish - * https://consortium.ch/sage_titlelist_publish - * https://consortium.ch/cup_titlelist_publish - -Licence term : - * Elsevier : 2020-2023 - * Springer Nature : 2020-2022 - * Wiley : 2021-2024 - * Taylor & Francis : 2021-2023 - * Cambridge University Press (CUP) : 2021-2023 - -CC licences : - * Elsevier : CC-BY, CC-BY-NC-ND - * Springer Nature : CC-BY, CC-BY-NC - * Wiley : CC-BY, CC-BY-NC, CC-BY-NC-ND - * Taylor & Francis : CC-BY - * Cambridge University Press (CUP) : CC-BY, CC-BY-NC, CC-BY-NC-ND, CC-BY-NC-SA - -Special conditions : - * Cambridge University Press (CUP) : Only the following article types are covered: Research Articles, Review Articles, Rapid Communication, Brief Reports and Case Reports - - - -## Import du fichier des issns - - -```python -issn = pd.read_csv('sample/issn.tsv', encoding='utf-8', header=0, sep='\t') -issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnjournalissn_type
010001-28155321
121399-00395322
230001-48424981
341520-48984982
450001-49667891
...............
175517562470-00455333
175617572470-00535332
175717582475-99536082
175817592504-44279941
175917602504-44359943
-

1760 rows × 4 columns

-
- - - - -```python -# open publishers -publisher = pd.read_csv('sample/publisher.tsv', encoding='utf-8', header=0, sep='\t') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
0Revue Médicale Suisse999999NaNNaN0NaNNaN1
1American Physical Society236NaNNaN0http://www.aps.org/NaN2
2Public Library of Science236NaNNaN0http://www.plos.org/NaN3
3The Global Studies Institute de l’Université d...999999NaNNaN0NaNNaN4
4Universitat de València, Departamento de Teorí...999999NaNNaN0NaNNaN5
...........................
191[American Medical Association]999999NaNNaN0http://archneur.jamanetwork.com/issues.aspxNaN192
192Société botanique de Genève999999NaNNaN0NaNNaN193
193Red.: Prof. Dr. F. Cavalli, Istituto oncologic...999999NaNNaN0NaNNaN194
194Generative Grammar Group of the Department of ...999999NaNNaN0NaNNaN195
195UNKNOWN999999NaNNaN0NaNNaN196
-

196 rows × 8 columns

-
- - - - -```python -publisher.loc[publisher['name'] == 'Elsevier'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
10Elsevier236NaNNaN0http://www.elsevier.com/NaN11
-
- - - - -```python -publisher.loc[(publisher['name'] == 'Springer Verlag') | (publisher['name'] == 'Nature Research')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
8Nature Research234NaNNaN0http://www.nature.com/NaN9
28Springer Verlag83NaNNaN0http://www.springerlink.com/?MUD=MPNaN29
-
- - - - -```python -publisher.loc[publisher['name'] == 'Wiley'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
11Wiley236NaNNaN0https://www.wiley.com/en-gbNaN12
-
- - - - -```python -publisher.loc[publisher['name'] == 'Taylor and Francis'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
23Taylor and Francis234NaNNaN0http://www.tandf.co.uk/journals/default.aspNaN24
-
- - - - -```python -publisher.loc[publisher['name'] == 'Cambridge University Press'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
60Cambridge University Press234NaNNaN0http://www.cambridge.org/uk/NaN61
-
- - - - -```python -# ouvrir la liste d'organisations -participants = pd.read_csv('agreements/consortium_institutions_participation_read_and_publish.csv', encoding='utf-8', header=0, sep='\t') -participants -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
InstitutionElsevierSpringer NatureWileyROR
0Agroscopexxxhttps://ror.org/04d8ztx87
1Berner Fachhochschule BFHxxxhttps://ror.org/02bnkt322
2CERNNaNxxhttps://ror.org/01ggx4157
3Eidgenössisches Hochschulinstitut für Berufsbi...xxxhttps://ror.org/00zg4za48
4EPF Lausannexxxhttps://ror.org/02s376052
5ETH Zürichxxxhttps://ror.org/05a28rw58
6Fachhochschule Graubünden FHGRxxxhttps://ror.org/032ymzc07
7Fachhochschule Nordwestschweiz FHNWxxxhttps://ror.org/04mq2g308
8Forschungsinstitut für biologischen Landbau FibLxxxhttps://ror.org/0210tb741
9Graduate Institute (IHEID) – since 2021xxxhttps://ror.org/007ygn379
10Haute école spécialisée de Suisse occidentale ...xxxhttps://ror.org/01xkakk17
11HEP Berne, Jura, Neuchâtel (HEP-BEJUNE)xxxhttps://ror.org/015pmkr43
12HEP Fribourg (PHFR)xxxhttps://ror.org/048gre751
13HEP Vaudxxxhttps://ror.org/01bvm0h13
14Hochschule für Wirtschaft Zürich HWZxxxhttps://ror.org/02ejkey04
15Hochschule Luzern HSLUxxxhttps://ror.org/04nd0xd48
16Interkantonale Hochschule für Heilpädagogik (HfH)xxxhttps://ror.org/00w9q2c06
17Kalaidosxxxhttps://ror.org/049c2kr37
18Lib4RIxxxhttps://ror.org/021f7p178
19MediNaNxNaNNaN
20MMV - Medicine for Malaria Venturesxxxhttps://ror.org/00p9jf779
21Ostschweizer Fachhochschulen OSTxxxhttps://ror.org/038mj2660
22Pädagogische Hochschule Zürich PHZHxxxhttps://ror.org/01awgk221
23PH Bernxxxhttps://ror.org/05jf1ma54
24PH Graubünden (PHGR)xxxhttps://ror.org/02fjgft97
25PH Luzernxxxhttps://ror.org/0235ynq74
26PH Schaffhausen (PHSH)xxxhttps://ror.org/03fs41j10
27PH Schwyzxxxhttps://ror.org/00rqdn375
28PH St. Gallen (PHSG)xxxhttps://ror.org/05m37v666
29PH Thurgau (PHTG)xxxhttps://ror.org/04bf6dq94
30PH Wallis / HEP Valaisxxxhttps://ror.org/040gs8e06
31PH Zugxxxhttps://ror.org/05ghhx264
32Schweizerische Vogelwartexxxhttps://ror.org/03mcsbr76
33Scuola universitaria professionale della Svizz...xxxhttps://ror.org/05ep8g269
34Università della Svizzera italiana USIxxxhttps://ror.org/03c4atk17
35Universität Baselxxxhttps://ror.org/02s6k3f65
36Universität Bernxxxhttps://ror.org/02k7v4d05
37Universität Liechtensteinxxxhttps://ror.org/01qjrx392
38Universität Luzernxxxhttps://ror.org/00kgrkn83
39Universität St. Gallenxxxhttps://ror.org/0561a3s31
40Universität Zürichxxxhttps://ror.org/02crff812
41Université de Fribourgxxxhttps://ror.org/022fs9h90
42Université de Genèvexxxhttps://ror.org/01swzsf04
43Université de Lausannexxxhttps://ror.org/019whta54
44Université de Neuchâtelxxxhttps://ror.org/00vasag41
45Zürcher Hochschule der Künste ZHdKxxxhttps://ror.org/05r0ap620
46Zürcher Hochschule für Angewandte Wissenschaft...xxxhttps://ror.org/05pmsvm27
-
- - - - -```python -# suppression de Lib4RI qui est une bibliothèque -participants = participants.loc[participants['Institution'] != 'Lib4RI'] -participants -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
InstitutionElsevierSpringer NatureWileyROR
0Agroscopexxxhttps://ror.org/04d8ztx87
1Berner Fachhochschule BFHxxxhttps://ror.org/02bnkt322
2CERNNaNxxhttps://ror.org/01ggx4157
3Eidgenössisches Hochschulinstitut für Berufsbi...xxxhttps://ror.org/00zg4za48
4EPF Lausannexxxhttps://ror.org/02s376052
5ETH Zürichxxxhttps://ror.org/05a28rw58
6Fachhochschule Graubünden FHGRxxxhttps://ror.org/032ymzc07
7Fachhochschule Nordwestschweiz FHNWxxxhttps://ror.org/04mq2g308
8Forschungsinstitut für biologischen Landbau FibLxxxhttps://ror.org/0210tb741
9Graduate Institute (IHEID) – since 2021xxxhttps://ror.org/007ygn379
10Haute école spécialisée de Suisse occidentale ...xxxhttps://ror.org/01xkakk17
11HEP Berne, Jura, Neuchâtel (HEP-BEJUNE)xxxhttps://ror.org/015pmkr43
12HEP Fribourg (PHFR)xxxhttps://ror.org/048gre751
13HEP Vaudxxxhttps://ror.org/01bvm0h13
14Hochschule für Wirtschaft Zürich HWZxxxhttps://ror.org/02ejkey04
15Hochschule Luzern HSLUxxxhttps://ror.org/04nd0xd48
16Interkantonale Hochschule für Heilpädagogik (HfH)xxxhttps://ror.org/00w9q2c06
17Kalaidosxxxhttps://ror.org/049c2kr37
19MediNaNxNaNNaN
20MMV - Medicine for Malaria Venturesxxxhttps://ror.org/00p9jf779
21Ostschweizer Fachhochschulen OSTxxxhttps://ror.org/038mj2660
22Pädagogische Hochschule Zürich PHZHxxxhttps://ror.org/01awgk221
23PH Bernxxxhttps://ror.org/05jf1ma54
24PH Graubünden (PHGR)xxxhttps://ror.org/02fjgft97
25PH Luzernxxxhttps://ror.org/0235ynq74
26PH Schaffhausen (PHSH)xxxhttps://ror.org/03fs41j10
27PH Schwyzxxxhttps://ror.org/00rqdn375
28PH St. Gallen (PHSG)xxxhttps://ror.org/05m37v666
29PH Thurgau (PHTG)xxxhttps://ror.org/04bf6dq94
30PH Wallis / HEP Valaisxxxhttps://ror.org/040gs8e06
31PH Zugxxxhttps://ror.org/05ghhx264
32Schweizerische Vogelwartexxxhttps://ror.org/03mcsbr76
33Scuola universitaria professionale della Svizz...xxxhttps://ror.org/05ep8g269
34Università della Svizzera italiana USIxxxhttps://ror.org/03c4atk17
35Universität Baselxxxhttps://ror.org/02s6k3f65
36Universität Bernxxxhttps://ror.org/02k7v4d05
37Universität Liechtensteinxxxhttps://ror.org/01qjrx392
38Universität Luzernxxxhttps://ror.org/00kgrkn83
39Universität St. Gallenxxxhttps://ror.org/0561a3s31
40Universität Zürichxxxhttps://ror.org/02crff812
41Université de Fribourgxxxhttps://ror.org/022fs9h90
42Université de Genèvexxxhttps://ror.org/01swzsf04
43Université de Lausannexxxhttps://ror.org/019whta54
44Université de Neuchâtelxxxhttps://ror.org/00vasag41
45Zürcher Hochschule der Künste ZHdKxxxhttps://ror.org/05r0ap620
46Zürcher Hochschule für Angewandte Wissenschaft...xxxhttps://ror.org/05pmsvm27
-
- - - - -```python -# ajout de TF et CUP pour tous (TODO : obtenir la liste des bibliothèques pour ces deux licences) -participants['TF'] = 'x' -participants['CUP'] = 'x' -participants -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - This is separate from the ipykernel package so we can avoid doing imports until - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
InstitutionElsevierSpringer NatureWileyRORTFCUP
0Agroscopexxxhttps://ror.org/04d8ztx87xx
1Berner Fachhochschule BFHxxxhttps://ror.org/02bnkt322xx
2CERNNaNxxhttps://ror.org/01ggx4157xx
3Eidgenössisches Hochschulinstitut für Berufsbi...xxxhttps://ror.org/00zg4za48xx
4EPF Lausannexxxhttps://ror.org/02s376052xx
5ETH Zürichxxxhttps://ror.org/05a28rw58xx
6Fachhochschule Graubünden FHGRxxxhttps://ror.org/032ymzc07xx
7Fachhochschule Nordwestschweiz FHNWxxxhttps://ror.org/04mq2g308xx
8Forschungsinstitut für biologischen Landbau FibLxxxhttps://ror.org/0210tb741xx
9Graduate Institute (IHEID) – since 2021xxxhttps://ror.org/007ygn379xx
10Haute école spécialisée de Suisse occidentale ...xxxhttps://ror.org/01xkakk17xx
11HEP Berne, Jura, Neuchâtel (HEP-BEJUNE)xxxhttps://ror.org/015pmkr43xx
12HEP Fribourg (PHFR)xxxhttps://ror.org/048gre751xx
13HEP Vaudxxxhttps://ror.org/01bvm0h13xx
14Hochschule für Wirtschaft Zürich HWZxxxhttps://ror.org/02ejkey04xx
15Hochschule Luzern HSLUxxxhttps://ror.org/04nd0xd48xx
16Interkantonale Hochschule für Heilpädagogik (HfH)xxxhttps://ror.org/00w9q2c06xx
17Kalaidosxxxhttps://ror.org/049c2kr37xx
19MediNaNxNaNNaNxx
20MMV - Medicine for Malaria Venturesxxxhttps://ror.org/00p9jf779xx
21Ostschweizer Fachhochschulen OSTxxxhttps://ror.org/038mj2660xx
22Pädagogische Hochschule Zürich PHZHxxxhttps://ror.org/01awgk221xx
23PH Bernxxxhttps://ror.org/05jf1ma54xx
24PH Graubünden (PHGR)xxxhttps://ror.org/02fjgft97xx
25PH Luzernxxxhttps://ror.org/0235ynq74xx
26PH Schaffhausen (PHSH)xxxhttps://ror.org/03fs41j10xx
27PH Schwyzxxxhttps://ror.org/00rqdn375xx
28PH St. Gallen (PHSG)xxxhttps://ror.org/05m37v666xx
29PH Thurgau (PHTG)xxxhttps://ror.org/04bf6dq94xx
30PH Wallis / HEP Valaisxxxhttps://ror.org/040gs8e06xx
31PH Zugxxxhttps://ror.org/05ghhx264xx
32Schweizerische Vogelwartexxxhttps://ror.org/03mcsbr76xx
33Scuola universitaria professionale della Svizz...xxxhttps://ror.org/05ep8g269xx
34Università della Svizzera italiana USIxxxhttps://ror.org/03c4atk17xx
35Universität Baselxxxhttps://ror.org/02s6k3f65xx
36Universität Bernxxxhttps://ror.org/02k7v4d05xx
37Universität Liechtensteinxxxhttps://ror.org/01qjrx392xx
38Universität Luzernxxxhttps://ror.org/00kgrkn83xx
39Universität St. Gallenxxxhttps://ror.org/0561a3s31xx
40Universität Zürichxxxhttps://ror.org/02crff812xx
41Université de Fribourgxxxhttps://ror.org/022fs9h90xx
42Université de Genèvexxxhttps://ror.org/01swzsf04xx
43Université de Lausannexxxhttps://ror.org/019whta54xx
44Université de Neuchâtelxxxhttps://ror.org/00vasag41xx
45Zürcher Hochschule der Künste ZHdKxxxhttps://ror.org/05r0ap620xx
46Zürcher Hochschule für Angewandte Wissenschaft...xxxhttps://ror.org/05pmsvm27xx
-
- - - - -```python -# ouvrir la liste des journaux Elsevier -elsevier = pd.read_excel('agreements/Elsevier_titlelist_publication.xlsx', skiprows=7) -elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSN
0Academic Pediatrics1876-2859
1Accident Analysis and Prevention0001-4575
2Accounting, Organizations and Society0361-3682
3Acta Astronautica0094-5765
4Acta Biomaterialia1742-7061
.........
2240Wound Medicine2213-9095
2241Zeitschrift fuer Evidenz, Fortbildung und Qual...1865-9217
2242Zeitschrift fuer Medizinische Physik0939-3889
2243Zoologischer Anzeiger0044-5231
2244Zoology0944-2006
-

2245 rows × 2 columns

-
- - - - -```python -# ajout du champ version -elsevier['article_version'] = 'published' -elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNarticle_version
0Academic Pediatrics1876-2859published
1Accident Analysis and Prevention0001-4575published
2Accounting, Organizations and Society0361-3682published
3Acta Astronautica0094-5765published
4Acta Biomaterialia1742-7061published
............
2240Wound Medicine2213-9095published
2241Zeitschrift fuer Evidenz, Fortbildung und Qual...1865-9217published
2242Zeitschrift fuer Medizinische Physik0939-3889published
2243Zoologischer Anzeiger0044-5231published
2244Zoology0944-2006published
-

2245 rows × 3 columns

-
- - - - -```python -# ajout des dates -elsevier['valid_from'] = '2020-01-01' -elsevier['valid_until'] = '2023-12-31' -elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNarticle_versionvalid_fromvalid_until
0Academic Pediatrics1876-2859published2020-01-012023-12-31
1Accident Analysis and Prevention0001-4575published2020-01-012023-12-31
2Accounting, Organizations and Society0361-3682published2020-01-012023-12-31
3Acta Astronautica0094-5765published2020-01-012023-12-31
4Acta Biomaterialia1742-7061published2020-01-012023-12-31
..................
2240Wound Medicine2213-9095published2020-01-012023-12-31
2241Zeitschrift fuer Evidenz, Fortbildung und Qual...1865-9217published2020-01-012023-12-31
2242Zeitschrift fuer Medizinische Physik0939-3889published2020-01-012023-12-31
2243Zoologischer Anzeiger0044-5231published2020-01-012023-12-31
2244Zoology0944-2006published2020-01-012023-12-31
-

2245 rows × 5 columns

-
- - - - -```python -# ajout du embargo et archiving -elsevier['embargo_months'] = 0 -elsevier['archiving'] = True -elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNarticle_versionvalid_fromvalid_untilembargo_monthsarchiving
0Academic Pediatrics1876-2859published2020-01-012023-12-310True
1Accident Analysis and Prevention0001-4575published2020-01-012023-12-310True
2Accounting, Organizations and Society0361-3682published2020-01-012023-12-310True
3Acta Astronautica0094-5765published2020-01-012023-12-310True
4Acta Biomaterialia1742-7061published2020-01-012023-12-310True
........................
2240Wound Medicine2213-9095published2020-01-012023-12-310True
2241Zeitschrift fuer Evidenz, Fortbildung und Qual...1865-9217published2020-01-012023-12-310True
2242Zeitschrift fuer Medizinische Physik0939-3889published2020-01-012023-12-310True
2243Zoologischer Anzeiger0044-5231published2020-01-012023-12-310True
2244Zoology0944-2006published2020-01-012023-12-310True
-

2245 rows × 7 columns

-
- - - - -```python -elsevier.iloc[elsevier.shape[0]-1] -``` - - - - - Title Zoology - ISSN 0944-2006 - article_version published - valid_from 2020-01-01 - valid_until 2023-12-31 - embargo_months 0 - archiving True - Name: 2244, dtype: object - - - - -```python -# ajout du champ license -# cc_by, cc_by_nc_nd -rp = pd.DataFrame() -elsevier['article_version'] = 'published' -elsevier['license'] = 'cc_by' -elsevier['Elsevier'] = 'x' -rp = rp.append(elsevier, ignore_index=True) -elsevier['license'] = 'cc_by_nc_nd' -rp = rp.append(elsevier, ignore_index=True) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNarticle_versionvalid_fromvalid_untilembargo_monthsarchivinglicenseElsevier
0Academic Pediatrics1876-2859published2020-01-012023-12-310Truecc_byx
1Accident Analysis and Prevention0001-4575published2020-01-012023-12-310Truecc_byx
2Accounting, Organizations and Society0361-3682published2020-01-012023-12-310Truecc_byx
3Acta Astronautica0094-5765published2020-01-012023-12-310Truecc_byx
4Acta Biomaterialia1742-7061published2020-01-012023-12-310Truecc_byx
..............................
4485Wound Medicine2213-9095published2020-01-012023-12-310Truecc_by_nc_ndx
4486Zeitschrift fuer Evidenz, Fortbildung und Qual...1865-9217published2020-01-012023-12-310Truecc_by_nc_ndx
4487Zeitschrift fuer Medizinische Physik0939-3889published2020-01-012023-12-310Truecc_by_nc_ndx
4488Zoologischer Anzeiger0044-5231published2020-01-012023-12-310Truecc_by_nc_ndx
4489Zoology0944-2006published2020-01-012023-12-310Truecc_by_nc_ndx
-

4490 rows × 9 columns

-
- - - - -```python -# ouvrir la liste des journaux Springer Nature -springer = pd.read_excel('agreements/Springer_titlelist_publication.xlsx', skiprows=7) -springer -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNURL
03 Biotech2190-5738https://www.springer.com/journal/13205
14OR1614-2411https://www.springer.com/journal/10288
2AAPS PharmSciTech1530-9932https://www.springer.com/journal/12249
3Abdominal Radiology2366-0058https://www.springer.com/journal/261
4Abhandlungen aus dem Mathematischen Seminar de...1865-8784https://www.springer.com/journal/12188
............
2035Zeitschrift für Religion, Gesellschaft und Pol...2510-1226https://www.springer.com/journal/41682
2036Zeitschrift für Rheumatologie1435-1250https://www.springer.com/journal/393
2037Zeitschrift für Vergleichende Politikwissenschaft1865-2654https://www.springer.com/journal/12286
2038Zentralblatt für Arbeitsmedizin, Arbeitsschutz...2198-0713https://www.springer.com/journal/40664
2039Zoomorphology1432-234Xhttps://www.springer.com/journal/435
-

2040 rows × 3 columns

-
- - - - -```python -# ajout du champ license -# cc_by, cc_by_nc -springer['article_version'] = 'published' -springer['license'] = 'cc_by' -springer['Springer Nature'] = 'x' -# ajout des dates -springer['valid_from'] = '2020-01-01' -springer['valid_until'] = '2022-12-31' -# ajout du embargo et archiving -springer['embargo_months'] = 0 -springer['archiving'] = True -``` - - -```python -# append -rp = rp.append(springer, ignore_index=True) -springer['license'] = 'cc_by_nc' -rp = rp.append(springer, ignore_index=True) -rp -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py:7123: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version - of pandas will change to not sort by default. - - To accept the future behavior, pass 'sort=False'. - - To retain the current behavior and silence the warning, pass 'sort=True'. - - sort=sort, - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElsevierISSNSpringer NatureTitleURLarchivingarticle_versionembargo_monthslicensevalid_fromvalid_until
0x1876-2859NaNAcademic PediatricsNaNTruepublished0cc_by2020-01-012023-12-31
1x0001-4575NaNAccident Analysis and PreventionNaNTruepublished0cc_by2020-01-012023-12-31
2x0361-3682NaNAccounting, Organizations and SocietyNaNTruepublished0cc_by2020-01-012023-12-31
3x0094-5765NaNActa AstronauticaNaNTruepublished0cc_by2020-01-012023-12-31
4x1742-7061NaNActa BiomaterialiaNaNTruepublished0cc_by2020-01-012023-12-31
....................................
8565NaN2510-1226xZeitschrift für Religion, Gesellschaft und Pol...https://www.springer.com/journal/41682Truepublished0cc_by_nc2020-01-012022-12-31
8566NaN1435-1250xZeitschrift für Rheumatologiehttps://www.springer.com/journal/393Truepublished0cc_by_nc2020-01-012022-12-31
8567NaN1865-2654xZeitschrift für Vergleichende Politikwissenschafthttps://www.springer.com/journal/12286Truepublished0cc_by_nc2020-01-012022-12-31
8568NaN2198-0713xZentralblatt für Arbeitsmedizin, Arbeitsschutz...https://www.springer.com/journal/40664Truepublished0cc_by_nc2020-01-012022-12-31
8569NaN1432-234XxZoomorphologyhttps://www.springer.com/journal/435Truepublished0cc_by_nc2020-01-012022-12-31
-

8570 rows × 11 columns

-
- - - - -```python -# ouvrir la liste des journaux Wiley -wiley = pd.read_excel('agreements/Wiley_titlelist_publish.xlsx', skiprows=7) -wiley -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNURL
0ABACUS1467-6281https://onlinelibrary.wiley.com/journal/14676281
1ACADEMIC EMERGENCY MEDICINE1553-2712https://onlinelibrary.wiley.com/journal/15532712
2ACCOUNTING & FINANCE1467-629Xhttps://onlinelibrary.wiley.com/journal/1467629X
3ACCOUNTING PERSPECTIVES1911-3838https://onlinelibrary.wiley.com/journal/19113838
4ACTA ANAESTHESIOLOGICA SCANDINAVICA1399-6576https://onlinelibrary.wiley.com/journal/13996576
............
1391ZEITSCHRIFT FüR ANORGANISCHE UND ALLGEMEINE CH...1521-3749https://onlinelibrary.wiley.com/journal/15213749
1392ZOO BIOLOGY1098-2361https://onlinelibrary.wiley.com/journal/10982361
1393ZOOLOGICA SCRIPTA1463-6409https://onlinelibrary.wiley.com/journal/14636409
1394ZOONOSES AND PUBLIC HEALTH1863-2378https://onlinelibrary.wiley.com/journal/18632378
1395ZYGON® JOURNAL OF RELIGION AND SCIENCE1467-9744https://onlinelibrary.wiley.com/journal/14679744
-

1396 rows × 3 columns

-
- - - - -```python -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd -wiley['article_version'] = 'published' -wiley['license'] = 'cc_by' -wiley['Wiley'] = 'x' -# ajout des dates -wiley['valid_from'] = '2021-01-01' -wiley['valid_until'] = '2024-12-31' -# ajout du embargo et archiving -wiley['embargo_months'] = 0 -wiley['archiving'] = True -rp = rp.append(wiley, ignore_index=True) -# append avec une autre licence -wiley['license'] = 'cc_by_nc' -rp = rp.append(wiley, ignore_index=True) -# append avec une autre licence -wiley['license'] = 'cc_by_nc_nd' -rp = rp.append(wiley, ignore_index=True) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElsevierISSNSpringer NatureTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_until
0x1876-2859NaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-31
1x0001-4575NaNAccident Analysis and PreventionNaNNaNTruepublished0cc_by2020-01-012023-12-31
2x0361-3682NaNAccounting, Organizations and SocietyNaNNaNTruepublished0cc_by2020-01-012023-12-31
3x0094-5765NaNActa AstronauticaNaNNaNTruepublished0cc_by2020-01-012023-12-31
4x1742-7061NaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-31
.......................................
12753NaN1521-3749NaNZEITSCHRIFT FüR ANORGANISCHE UND ALLGEMEINE CH...https://onlinelibrary.wiley.com/journal/15213749xTruepublished0cc_by_nc_nd2021-01-012024-12-31
12754NaN1098-2361NaNZOO BIOLOGYhttps://onlinelibrary.wiley.com/journal/10982361xTruepublished0cc_by_nc_nd2021-01-012024-12-31
12755NaN1463-6409NaNZOOLOGICA SCRIPTAhttps://onlinelibrary.wiley.com/journal/14636409xTruepublished0cc_by_nc_nd2021-01-012024-12-31
12756NaN1863-2378NaNZOONOSES AND PUBLIC HEALTHhttps://onlinelibrary.wiley.com/journal/18632378xTruepublished0cc_by_nc_nd2021-01-012024-12-31
12757NaN1467-9744NaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-31
-

12758 rows × 12 columns

-
- - - - -```python -# ouvrir la liste des journaux TF -tf = pd.read_excel('agreements/TandF_titlelist_publish.xlsx', skiprows=7) -tf -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSN
0a/b: Auto/Biography Studies2151-7290
1Accountability in Research1545-5815
2Accounting and Business Research2159-4260
3Accounting Education1468-4489
4Accounting Forum1467-6303
.........
2401Writing Systems ResearchNaN
2402Xenobiotica1366-5928
2403Yorkshire Archaeological Journal2045-0664
2404Youth Theatre Journal1948-4798
2405Zoology in the Middle East2326-2680
-

2406 rows × 2 columns

-
- - - - -```python -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd -tf['article_version'] = 'published' -tf['license'] = 'cc_by' -tf['TF'] = 'x' -# ajout des dates -tf['valid_from'] = '2021-01-01' -tf['valid_until'] = '2023-12-31' -# ajout du embargo et archiving -tf['embargo_months'] = 0 -tf['archiving'] = True -``` - - -```python -# append -rp = rp.append(tf, ignore_index=True) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElsevierISSNSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_until
0x1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-31
1x0001-4575NaNNaNAccident Analysis and PreventionNaNNaNTruepublished0cc_by2020-01-012023-12-31
2x0361-3682NaNNaNAccounting, Organizations and SocietyNaNNaNTruepublished0cc_by2020-01-012023-12-31
3x0094-5765NaNNaNActa AstronauticaNaNNaNTruepublished0cc_by2020-01-012023-12-31
4x1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-31
..........................................
15159NaNNaNNaNxWriting Systems ResearchNaNNaNTruepublished0cc_by2021-01-012023-12-31
15160NaN1366-5928NaNxXenobioticaNaNNaNTruepublished0cc_by2021-01-012023-12-31
15161NaN2045-0664NaNxYorkshire Archaeological JournalNaNNaNTruepublished0cc_by2021-01-012023-12-31
15162NaN1948-4798NaNxYouth Theatre JournalNaNNaNTruepublished0cc_by2021-01-012023-12-31
15163NaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-31
-

15164 rows × 13 columns

-
- - - - -```python -# ouvrir la liste des journaux CUP -cup = pd.read_excel('agreements/CUP_Journals_titlelist_publish.xlsx', skiprows=7) -cup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Titlee-ISSNURL
0Agricultural and Resource Economics Review2372-2614http://www.cambridge.org/core/product/identifi...
1AJIL Unbound2398-7723http://www.cambridge.org/core/product/identifi...
2Annals of Glaciology1727-5644http://www.cambridge.org/core/product/identifi...
3APSIPA Transactions on Signal and Information ...2048-7703http://www.cambridge.org/core/product/identifi...
4Biological Imaging2633-903Xhttp://www.cambridge.org/core/product/identifi...
............
366Visual Neuroscience1469-8714http://www.cambridge.org/core/product/identifi...
367Weed Science1550-2759http://www.cambridge.org/core/product/identifi...
368Weed Technology1550-2740http://www.cambridge.org/core/product/identifi...
369World Trade Review1475-3138http://www.cambridge.org/core/product/identifi...
370Zygote1469-8730http://www.cambridge.org/core/product/identifi...
-

371 rows × 3 columns

-
- - - - -```python -# renommer l'ISSN -cup = cup.rename(columns = {'e-ISSN' : 'ISSN'}) -cup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNURL
0Agricultural and Resource Economics Review2372-2614http://www.cambridge.org/core/product/identifi...
1AJIL Unbound2398-7723http://www.cambridge.org/core/product/identifi...
2Annals of Glaciology1727-5644http://www.cambridge.org/core/product/identifi...
3APSIPA Transactions on Signal and Information ...2048-7703http://www.cambridge.org/core/product/identifi...
4Biological Imaging2633-903Xhttp://www.cambridge.org/core/product/identifi...
............
366Visual Neuroscience1469-8714http://www.cambridge.org/core/product/identifi...
367Weed Science1550-2759http://www.cambridge.org/core/product/identifi...
368Weed Technology1550-2740http://www.cambridge.org/core/product/identifi...
369World Trade Review1475-3138http://www.cambridge.org/core/product/identifi...
370Zygote1469-8730http://www.cambridge.org/core/product/identifi...
-

371 rows × 3 columns

-
- - - - -```python -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd, cc_by_nc_sa -cup['article_version'] = 'published' -cup['license'] = 'cc_by' -cup['CUP'] = 'x' -# ajout des dates -cup['valid_from'] = '2021-01-01' -cup['valid_until'] = '2023-12-31' -# ajout du embargo et archiving -cup['embargo_months'] = 60 -cup['archiving'] = True -``` - - -```python -# append -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc' -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc_nd' -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc_sa' -rp = rp.append(cup, ignore_index=True) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierISSNSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_until
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-31
1NaNx0001-4575NaNNaNAccident Analysis and PreventionNaNNaNTruepublished0cc_by2020-01-012023-12-31
2NaNx0361-3682NaNNaNAccounting, Organizations and SocietyNaNNaNTruepublished0cc_by2020-01-012023-12-31
3NaNx0094-5765NaNNaNActa AstronauticaNaNNaNTruepublished0cc_by2020-01-012023-12-31
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-31
.............................................
16643xNaN1469-8714NaNNaNVisual Neurosciencehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-31
16644xNaN1550-2759NaNNaNWeed Sciencehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-31
16645xNaN1550-2740NaNNaNWeed Technologyhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-31
16646xNaN1475-3138NaNNaNWorld Trade Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-31
16647xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-31
-

16648 rows × 14 columns

-
- - - - -```python -# test des lignes sans embargo -rp.loc[rp['embargo_months'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierISSNSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_until
-
- - - - -```python -# ajout des ISSN-L -issnl = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issnl -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISSNISSN-L
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -issnl = issnl.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -rp = rp.rename(columns={'ISSN' : 'issn'}) -``` - - -```python -# merge -rp = pd.merge(rp, issnl, on='issn', how='left') -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnl
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859
1NaNx0001-4575NaNNaNAccident Analysis and PreventionNaNNaNTruepublished0cc_by2020-01-012023-12-310001-4575
2NaNx0361-3682NaNNaNAccounting, Organizations and SocietyNaNNaNTruepublished0cc_by2020-01-012023-12-310361-3682
3NaNx0094-5765NaNNaNActa AstronauticaNaNNaNTruepublished0cc_by2020-01-012023-12-310094-5765
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061
................................................
16643xNaN1469-8714NaNNaNVisual Neurosciencehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310952-5238
16644xNaN1550-2759NaNNaNWeed Sciencehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310043-1745
16645xNaN1550-2740NaNNaNWeed Technologyhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310890-037X
16646xNaN1475-3138NaNNaNWorld Trade Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311474-7456
16647xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994
-

16648 rows × 15 columns

-
- - - - -```python -# cummuler les issns pour le merge -# rp_1 = rp.loc[rp['issnl'].notna()][['issnl', 'article_version', 'license', 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP']] -# rp_1 = rp_1.rename(columns = {'issnl' : 'issn'}) -# rp_2 = rp.loc[rp['issn'].notna()][['issn', 'article_version', 'license', 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP']] -# rp_all = rp_1.append(rp_2, ignore_index=True) -rp_all = rp -``` - - -```python -# ajouter les champs manquants -# valeur discount (id 2) à 100% pour les licences read & publish -# elsevier['amount'] = 100 -# elsevier['symbol'] = '%' -# elsevier['cost_factor_type'] = 2 -# elsevier['comment'] = 'Source: swissuniversities' -# elsevier -``` - - -```python -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_elsevier = participants.loc[participants['Elsevier'].notna()][['Elsevier', 'ROR']] -rp_elsevier = rp_all.loc[rp_all['Elsevier'].notna()] -rp_1 = pd.merge(rp_elsevier, participants_elsevier, on='Elsevier', how='outer') -rp_1 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/04d8ztx87
1NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02bnkt322
2NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/00zg4za48
3NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02s376052
4NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/05a28rw58
...................................................
197555NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006https://ror.org/01swzsf04
197556NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006https://ror.org/019whta54
197557NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006https://ror.org/00vasag41
197558NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006https://ror.org/05r0ap620
197559NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006https://ror.org/05pmsvm27
-

197560 rows × 16 columns

-
- - - - -```python -rp_elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnl
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859
1NaNx0001-4575NaNNaNAccident Analysis and PreventionNaNNaNTruepublished0cc_by2020-01-012023-12-310001-4575
2NaNx0361-3682NaNNaNAccounting, Organizations and SocietyNaNNaNTruepublished0cc_by2020-01-012023-12-310361-3682
3NaNx0094-5765NaNNaNActa AstronauticaNaNNaNTruepublished0cc_by2020-01-012023-12-310094-5765
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061
................................................
4485NaNx2213-9095NaNNaNWound MedicineNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-312213-9095
4486NaNx1865-9217NaNNaNZeitschrift fuer Evidenz, Fortbildung und Qual...NaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-311865-9217
4487NaNx0939-3889NaNNaNZeitschrift fuer Medizinische PhysikNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310939-3889
4488NaNx0044-5231NaNNaNZoologischer AnzeigerNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310044-5231
4489NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006
-

4490 rows × 15 columns

-
- - - - -```python -participants_elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElsevierROR
0xhttps://ror.org/04d8ztx87
1xhttps://ror.org/02bnkt322
3xhttps://ror.org/00zg4za48
4xhttps://ror.org/02s376052
5xhttps://ror.org/05a28rw58
6xhttps://ror.org/032ymzc07
7xhttps://ror.org/04mq2g308
8xhttps://ror.org/0210tb741
9xhttps://ror.org/007ygn379
10xhttps://ror.org/01xkakk17
11xhttps://ror.org/015pmkr43
12xhttps://ror.org/048gre751
13xhttps://ror.org/01bvm0h13
14xhttps://ror.org/02ejkey04
15xhttps://ror.org/04nd0xd48
16xhttps://ror.org/00w9q2c06
17xhttps://ror.org/049c2kr37
20xhttps://ror.org/00p9jf779
21xhttps://ror.org/038mj2660
22xhttps://ror.org/01awgk221
23xhttps://ror.org/05jf1ma54
24xhttps://ror.org/02fjgft97
25xhttps://ror.org/0235ynq74
26xhttps://ror.org/03fs41j10
27xhttps://ror.org/00rqdn375
28xhttps://ror.org/05m37v666
29xhttps://ror.org/04bf6dq94
30xhttps://ror.org/040gs8e06
31xhttps://ror.org/05ghhx264
32xhttps://ror.org/03mcsbr76
33xhttps://ror.org/05ep8g269
34xhttps://ror.org/03c4atk17
35xhttps://ror.org/02s6k3f65
36xhttps://ror.org/02k7v4d05
37xhttps://ror.org/01qjrx392
38xhttps://ror.org/00kgrkn83
39xhttps://ror.org/0561a3s31
40xhttps://ror.org/02crff812
41xhttps://ror.org/022fs9h90
42xhttps://ror.org/01swzsf04
43xhttps://ror.org/019whta54
44xhttps://ror.org/00vasag41
45xhttps://ror.org/05r0ap620
46xhttps://ror.org/05pmsvm27
-
- - - - -```python -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_springer = participants.loc[participants['Springer Nature'].notna()][['Springer Nature', 'ROR']] -rp_springer = rp_all.loc[rp_all['Springer Nature'].notna()] -rp_2 = pd.merge(rp_springer, participants_springer, on='Springer Nature', how='outer') -rp_2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNNaN2190-5738xNaN3 Biotechhttps://www.springer.com/journal/13205NaNTruepublished0cc_by2020-01-012022-12-312190-5738https://ror.org/04d8ztx87
1NaNNaN2190-5738xNaN3 Biotechhttps://www.springer.com/journal/13205NaNTruepublished0cc_by2020-01-012022-12-312190-5738https://ror.org/02bnkt322
2NaNNaN2190-5738xNaN3 Biotechhttps://www.springer.com/journal/13205NaNTruepublished0cc_by2020-01-012022-12-312190-5738https://ror.org/01ggx4157
3NaNNaN2190-5738xNaN3 Biotechhttps://www.springer.com/journal/13205NaNTruepublished0cc_by2020-01-012022-12-312190-5738https://ror.org/00zg4za48
4NaNNaN2190-5738xNaN3 Biotechhttps://www.springer.com/journal/13205NaNTruepublished0cc_by2020-01-012022-12-312190-5738https://ror.org/02s376052
...................................................
187675NaNNaN1432-234XxNaNZoomorphologyhttps://www.springer.com/journal/435NaNTruepublished0cc_by_nc2020-01-012022-12-310720-213Xhttps://ror.org/01swzsf04
187676NaNNaN1432-234XxNaNZoomorphologyhttps://www.springer.com/journal/435NaNTruepublished0cc_by_nc2020-01-012022-12-310720-213Xhttps://ror.org/019whta54
187677NaNNaN1432-234XxNaNZoomorphologyhttps://www.springer.com/journal/435NaNTruepublished0cc_by_nc2020-01-012022-12-310720-213Xhttps://ror.org/00vasag41
187678NaNNaN1432-234XxNaNZoomorphologyhttps://www.springer.com/journal/435NaNTruepublished0cc_by_nc2020-01-012022-12-310720-213Xhttps://ror.org/05r0ap620
187679NaNNaN1432-234XxNaNZoomorphologyhttps://www.springer.com/journal/435NaNTruepublished0cc_by_nc2020-01-012022-12-310720-213Xhttps://ror.org/05pmsvm27
-

187680 rows × 16 columns

-
- - - - -```python -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_wiley = participants.loc[participants['Wiley'].notna()][['Wiley', 'ROR']] -rp_wiley = rp_all.loc[rp_all['Wiley'].notna()] -rp_3 = pd.merge(rp_wiley, participants_wiley, on='Wiley', how='outer') -rp_3 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072https://ror.org/04d8ztx87
1NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072https://ror.org/02bnkt322
2NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072https://ror.org/01ggx4157
3NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072https://ror.org/00zg4za48
4NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072https://ror.org/02s376052
...................................................
188455NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385https://ror.org/01swzsf04
188456NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385https://ror.org/019whta54
188457NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385https://ror.org/00vasag41
188458NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385https://ror.org/05r0ap620
188459NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385https://ror.org/05pmsvm27
-

188460 rows × 16 columns

-
- - - - -```python -rp_wiley -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnl
8570NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072
8571NaNNaN1553-2712NaNNaNACADEMIC EMERGENCY MEDICINEhttps://onlinelibrary.wiley.com/journal/15532712xTruepublished0cc_by2021-01-012024-12-311069-6563
8572NaNNaN1467-629XNaNNaNACCOUNTING & FINANCEhttps://onlinelibrary.wiley.com/journal/1467629XxTruepublished0cc_by2021-01-012024-12-310810-5391
8573NaNNaN1911-3838NaNNaNACCOUNTING PERSPECTIVEShttps://onlinelibrary.wiley.com/journal/19113838xTruepublished0cc_by2021-01-012024-12-311911-382X
8574NaNNaN1399-6576NaNNaNACTA ANAESTHESIOLOGICA SCANDINAVICAhttps://onlinelibrary.wiley.com/journal/13996576xTruepublished0cc_by2021-01-012024-12-310001-5172
................................................
12753NaNNaN1521-3749NaNNaNZEITSCHRIFT FüR ANORGANISCHE UND ALLGEMEINE CH...https://onlinelibrary.wiley.com/journal/15213749xTruepublished0cc_by_nc_nd2021-01-012024-12-310044-2313
12754NaNNaN1098-2361NaNNaNZOO BIOLOGYhttps://onlinelibrary.wiley.com/journal/10982361xTruepublished0cc_by_nc_nd2021-01-012024-12-310733-3188
12755NaNNaN1463-6409NaNNaNZOOLOGICA SCRIPTAhttps://onlinelibrary.wiley.com/journal/14636409xTruepublished0cc_by_nc_nd2021-01-012024-12-310300-3256
12756NaNNaN1863-2378NaNNaNZOONOSES AND PUBLIC HEALTHhttps://onlinelibrary.wiley.com/journal/18632378xTruepublished0cc_by_nc_nd2021-01-012024-12-311863-1959
12757NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385
-

4188 rows × 15 columns

-
- - - - -```python -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_tf = participants.loc[participants['TF'].notna()][['TF', 'ROR']] -rp_tf = rp_all.loc[rp_all['TF'].notna()] -rp_4 = pd.merge(rp_tf, participants_tf, on='TF', how='outer') -rp_4 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNNaN2151-7290NaNxa/b: Auto/Biography StudiesNaNNaNTruepublished0cc_by2021-01-012023-12-310898-9575https://ror.org/04d8ztx87
1NaNNaN2151-7290NaNxa/b: Auto/Biography StudiesNaNNaNTruepublished0cc_by2021-01-012023-12-310898-9575https://ror.org/02bnkt322
2NaNNaN2151-7290NaNxa/b: Auto/Biography StudiesNaNNaNTruepublished0cc_by2021-01-012023-12-310898-9575https://ror.org/01ggx4157
3NaNNaN2151-7290NaNxa/b: Auto/Biography StudiesNaNNaNTruepublished0cc_by2021-01-012023-12-310898-9575https://ror.org/00zg4za48
4NaNNaN2151-7290NaNxa/b: Auto/Biography StudiesNaNNaNTruepublished0cc_by2021-01-012023-12-310898-9575https://ror.org/02s376052
...................................................
110671NaNNaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-310939-7140https://ror.org/01swzsf04
110672NaNNaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-310939-7140https://ror.org/019whta54
110673NaNNaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-310939-7140https://ror.org/00vasag41
110674NaNNaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-310939-7140https://ror.org/05r0ap620
110675NaNNaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-310939-7140https://ror.org/05pmsvm27
-

110676 rows × 16 columns

-
- - - - -```python -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_cup = participants.loc[participants['CUP'].notna()][['CUP', 'ROR']] -rp_cup = rp_all.loc[rp_all['CUP'].notna()] -rp_5 = pd.merge(rp_cup, participants_cup, on='CUP', how='outer') -rp_5 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0xNaN2372-2614NaNNaNAgricultural and Resource Economics Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by2021-01-012023-12-311068-2805https://ror.org/04d8ztx87
1xNaN2372-2614NaNNaNAgricultural and Resource Economics Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by2021-01-012023-12-311068-2805https://ror.org/02bnkt322
2xNaN2372-2614NaNNaNAgricultural and Resource Economics Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by2021-01-012023-12-311068-2805https://ror.org/01ggx4157
3xNaN2372-2614NaNNaNAgricultural and Resource Economics Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by2021-01-012023-12-311068-2805https://ror.org/00zg4za48
4xNaN2372-2614NaNNaNAgricultural and Resource Economics Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by2021-01-012023-12-311068-2805https://ror.org/02s376052
...................................................
68259xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/01swzsf04
68260xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/019whta54
68261xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/00vasag41
68262xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05r0ap620
68263xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05pmsvm27
-

68264 rows × 16 columns

-
- - - - -```python -# concat des 5 -rp_fin = rp_1.append(rp_2, ignore_index=True) -rp_fin = rp_fin.append(rp_3, ignore_index=True) -rp_fin = rp_fin.append(rp_4, ignore_index=True) -rp_fin = rp_fin.append(rp_5, ignore_index=True) -rp_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/04d8ztx87
1NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02bnkt322
2NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/00zg4za48
3NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02s376052
4NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/05a28rw58
...................................................
752635xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/01swzsf04
752636xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/019whta54
752637xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/00vasag41
752638xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05r0ap620
752639xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05pmsvm27
-

752640 rows × 16 columns

-
- - - - -```python -# supprimer les doublons et les vides -rp_fin = rp_fin.dropna(subset=['issn']) -rp_fin = rp_fin.drop_duplicates(subset=['issn', 'license', 'ROR']) -rp_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/04d8ztx87
1NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02bnkt322
2NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/00zg4za48
3NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02s376052
4NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/05a28rw58
...................................................
752635xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/01swzsf04
752636xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/019whta54
752637xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/00vasag41
752638xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05r0ap620
752639xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05pmsvm27
-

751628 rows × 16 columns

-
- - - - -```python -# reindex et ajout de l'id avec l'index + 1 -rp_fin = rp_fin.reset_index() -del rp_fin['index'] -rp_fin = rp_fin.reset_index() -rp_fin['rp_id'] = rp_fin.index + 1 -rp_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_id
00NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/04d8ztx871
11NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02bnkt3222
22NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/00zg4za483
33NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02s3760524
44NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/05a28rw585
.........................................................
751623751623xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/01swzsf04751624
751624751624xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/019whta54751625
751625751625xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/00vasag41751626
751626751626xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05r0ap620751627
751627751627xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05pmsvm27751628
-

751628 rows × 18 columns

-
- - - - -```python -rp_fin['embargo_months'].value_counts() -``` - - - - - 0 683364 - 60 68264 - Name: embargo_months, dtype: int64 - - - - -```python -# test des lignes sans embargo -rp_fin.loc[rp_fin['embargo_months'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_id
-
- - - - -```python -issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnjournalissn_type
010001-28155321
121399-00395322
230001-48424981
341520-48984982
450001-49667891
...............
175517562470-00455333
175617572470-00535332
175717582475-99536082
175817592504-44279941
175917602504-44359943
-

1760 rows × 4 columns

-
- - - - -```python -# merge pour avoir l'issnl -issn = pd.merge(issn, issnl, on='issn', how='left') -issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnjournalissn_typeissnl
010001-281553210001-2815
121399-003953220001-2815
230001-484249810001-4842
341520-489849820001-4842
450001-496678910001-4966
..................
175517562470-004553332470-0045
175617572470-005353322470-0045
175717582475-995360822475-9953
175817592504-442799412504-4427
175917602504-443599432504-4427
-

1760 rows × 5 columns

-
- - - - -```python -issn.loc[issn['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - -
idissnjournalissn_typeissnl
-
- - - - -```python -# merge dans l'autre sens pour garder que les lignes du fichier -rp_fin = pd.merge(rp_fin, issn[['id', 'journal', 'issnl']], on='issnl', how='left') -rp_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_ididjournal
00NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/04d8ztx871NaNNaN
11NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02bnkt3222NaNNaN
22NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/00zg4za483NaNNaN
33NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02s3760524NaNNaN
44NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/05a28rw585NaNNaN
...............................................................
792211751623xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/01swzsf04751624NaNNaN
792212751624xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/019whta54751625NaNNaN
792213751625xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/00vasag41751626NaNNaN
792214751626xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05r0ap620751627NaNNaN
792215751627xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05pmsvm27751628NaNNaN
-

792216 rows × 20 columns

-
- - - - -```python -# test des lignes sans embargo -rp_fin.loc[rp_fin['embargo_months'].isna() & rp_fin['id'].notna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_ididjournal
-
- - - - -```python -# garder les lignes avec merge -rp_fin_merge = rp_fin.loc[rp_fin['id'].notna()] -rp_fin_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_ididjournal
176176NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx871771623.0899.0
177176NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx871771624.0899.0
178177NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt3221781623.0899.0
179177NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt3221781624.0899.0
180178NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za481791623.0899.0
...............................................................
788071747485xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag417474861419.0592.0
788072747486xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap6207474871418.0592.0
788073747486xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap6207474871419.0592.0
788074747487xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm277474881418.0592.0
788075747487xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm277474881419.0592.0
-

80671 rows × 20 columns

-
- - - - -```python -# supprimer les doublons et les vides -rp_fin_merge = rp_fin_merge.drop_duplicates(subset=['rp_id']) -rp_fin_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_ididjournal
176176NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx871771623.0899.0
178177NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt3221781623.0899.0
180178NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za481791623.0899.0
182179NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s3760521801623.0899.0
184180NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw581811623.0899.0
...............................................................
788066747483xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf047474841418.0592.0
788068747484xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta547474851418.0592.0
788070747485xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag417474861418.0592.0
788072747486xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap6207474871418.0592.0
788074747487xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm277474881418.0592.0
-

40083 rows × 20 columns

-
- - - - -```python -# test des lignes sans journal -rp_fin_merge.loc[rp_fin_merge['journal'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_ididjournal
-
- - - - -```python -# convertir l'index en id -del rp_fin_merge['id'] -del rp_fin_merge['index'] -del rp_fin_merge['rp_id'] -rp_fin_merge = rp_fin_merge.reset_index() -# ajout de l'id avec l'index + 1 -rp_fin_merge['rp_id'] = rp_fin_merge['index'] + 1 -del rp_fin_merge['index'] -rp_fin_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_id
0NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.0177
1NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.0179
2NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.0181
3NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.0183
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.0185
.........................................................
40078xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.0788067
40079xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.0788069
40080xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.0788071
40081xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.0788073
40082xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.0788075
-

40083 rows × 18 columns

-
- - - - -```python -# convertir l'index en id -del rp_fin_merge['rp_id'] -rp_fin_merge = rp_fin_merge.reset_index() -# ajout de l'id avec l'index + 1 -rp_fin_merge['rp_id'] = rp_fin_merge['index'] + 1 -del rp_fin_merge['index'] -rp_fin_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_id
0NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01
1NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02
2NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03
3NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05
.........................................................
40078xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079
40079xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080
40080xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081
40081xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082
40082xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083
-

40083 rows × 18 columns

-
- - - - -```python -rp_fin_merge['embargo_months'].value_counts() -``` - - - - - 0 39163 - 60 920 - Name: embargo_months, dtype: int64 - - - - -```python -# test des lignes sans embargo -rp_fin_merge.loc[rp_fin_merge['embargo_months'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_id
-
- - - - -```python -# export excel -rp_fin_merge.to_excel('sample/read_publish_brut_merge.xlsx', index=False) -``` - - -```python -# export csv -rp_fin_merge.to_csv('sample/read_publish_brut_merge.tsv', sep='\t', index=False) -``` diff --git a/import_scripts/09_oacct_read_and_publish.py b/import_scripts/09_oacct_read_and_publish.py deleted file mode 100644 index 98ff4da0..00000000 --- a/import_scripts/09_oacct_read_and_publish.py +++ /dev/null @@ -1,607 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet de modifier les données extraites des differentes sources et les exporter dans les tables de l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 08.09.2021 - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) -# definir le debut des ids -id_start = 1 - - -# ## Ajout des rabais pour les revues des licences Read & Publish -# -# Journals list by publisher : -# * https://consortium.ch/elsevier_titlelist_publication -# * https://consortium.ch/springer_titlelist_publication -# * https://consortium.ch/wiley_titlelist_publish -# * https://consortium.ch/tandf_titlelist_publish -# * https://consortium.ch/sage_titlelist_publish -# * https://consortium.ch/cup_titlelist_publish -# -# Licence term : -# * Elsevier : 2020-2023 -# * Springer Nature : 2020-2022 -# * Wiley : 2021-2024 -# * Taylor & Francis : 2021-2023 -# * Cambridge University Press (CUP) : 2021-2023 -# -# CC licences : -# * Elsevier : CC-BY, CC-BY-NC-ND -# * Springer Nature : CC-BY, CC-BY-NC -# * Wiley : CC-BY, CC-BY-NC, CC-BY-NC-ND -# * Taylor & Francis : CC-BY -# * Cambridge University Press (CUP) : CC-BY, CC-BY-NC, CC-BY-NC-ND, CC-BY-NC-SA -# -# Special conditions : -# * Cambridge University Press (CUP) : Only the following article types are covered: Research Articles, Review Articles, Rapid Communication, Brief Reports and Case Reports -# -# - -# ## Import du fichier des issns - -# In[2]: - - -issn = pd.read_csv('sample/issn.tsv', encoding='utf-8', header=0, sep='\t') -issn - - -# In[3]: - - -# open publishers -publisher = pd.read_csv('sample/publisher.tsv', encoding='utf-8', header=0, sep='\t') -publisher - - -# In[4]: - - -publisher.loc[publisher['name'] == 'Elsevier'] - - -# In[5]: - - -publisher.loc[(publisher['name'] == 'Springer Verlag') | (publisher['name'] == 'Nature Research')] - - -# In[6]: - - -publisher.loc[publisher['name'] == 'Wiley'] - - -# In[7]: - - -publisher.loc[publisher['name'] == 'Taylor and Francis'] - - -# In[8]: - - -publisher.loc[publisher['name'] == 'Cambridge University Press'] - - -# In[9]: - - -# ouvrir la liste d'organisations -participants = pd.read_csv('agreements/consortium_institutions_participation_read_and_publish.csv', encoding='utf-8', header=0, sep='\t') -participants - - -# In[10]: - - -# suppression de Lib4RI qui est une bibliothèque -participants = participants.loc[participants['Institution'] != 'Lib4RI'] -participants - - -# In[11]: - - -# ajout de TF et CUP pour tous (TODO : obtenir la liste des bibliothèques pour ces deux licences) -participants['TF'] = 'x' -participants['CUP'] = 'x' -participants - - -# In[12]: - - -# ouvrir la liste des journaux Elsevier -elsevier = pd.read_excel('agreements/Elsevier_titlelist_publication.xlsx', skiprows=7) -elsevier - - -# In[13]: - - -# ajout du champ version -elsevier['article_version'] = 'published' -elsevier - - -# In[14]: - - -# ajout des dates -elsevier['valid_from'] = '2020-01-01' -elsevier['valid_until'] = '2023-12-31' -elsevier - - -# In[15]: - - -# ajout du embargo et archiving -elsevier['embargo_months'] = 0 -elsevier['archiving'] = True -elsevier - - -# In[16]: - - -elsevier.iloc[elsevier.shape[0]-1] - - -# In[17]: - - -# ajout du champ license -# cc_by, cc_by_nc_nd -rp = pd.DataFrame() -elsevier['article_version'] = 'published' -elsevier['license'] = 'cc_by' -elsevier['Elsevier'] = 'x' -rp = rp.append(elsevier, ignore_index=True) -elsevier['license'] = 'cc_by_nc_nd' -rp = rp.append(elsevier, ignore_index=True) -rp - - -# In[18]: - - -# ouvrir la liste des journaux Springer Nature -springer = pd.read_excel('agreements/Springer_titlelist_publication.xlsx', skiprows=7) -springer - - -# In[19]: - - -# ajout du champ license -# cc_by, cc_by_nc -springer['article_version'] = 'published' -springer['license'] = 'cc_by' -springer['Springer Nature'] = 'x' -# ajout des dates -springer['valid_from'] = '2020-01-01' -springer['valid_until'] = '2022-12-31' -# ajout du embargo et archiving -springer['embargo_months'] = 0 -springer['archiving'] = True - - -# In[20]: - - -# append -rp = rp.append(springer, ignore_index=True) -springer['license'] = 'cc_by_nc' -rp = rp.append(springer, ignore_index=True) -rp - - -# In[21]: - - -# ouvrir la liste des journaux Wiley -wiley = pd.read_excel('agreements/Wiley_titlelist_publish.xlsx', skiprows=7) -wiley - - -# In[22]: - - -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd -wiley['article_version'] = 'published' -wiley['license'] = 'cc_by' -wiley['Wiley'] = 'x' -# ajout des dates -wiley['valid_from'] = '2021-01-01' -wiley['valid_until'] = '2024-12-31' -# ajout du embargo et archiving -wiley['embargo_months'] = 0 -wiley['archiving'] = True -rp = rp.append(wiley, ignore_index=True) -# append avec une autre licence -wiley['license'] = 'cc_by_nc' -rp = rp.append(wiley, ignore_index=True) -# append avec une autre licence -wiley['license'] = 'cc_by_nc_nd' -rp = rp.append(wiley, ignore_index=True) -rp - - -# In[23]: - - -# ouvrir la liste des journaux TF -tf = pd.read_excel('agreements/TandF_titlelist_publish.xlsx', skiprows=7) -tf - - -# In[24]: - - -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd -tf['article_version'] = 'published' -tf['license'] = 'cc_by' -tf['TF'] = 'x' -# ajout des dates -tf['valid_from'] = '2021-01-01' -tf['valid_until'] = '2023-12-31' -# ajout du embargo et archiving -tf['embargo_months'] = 0 -tf['archiving'] = True - - -# In[25]: - - -# append -rp = rp.append(tf, ignore_index=True) -rp - - -# In[26]: - - -# ouvrir la liste des journaux CUP -cup = pd.read_excel('agreements/CUP_Journals_titlelist_publish.xlsx', skiprows=7) -cup - - -# In[27]: - - -# renommer l'ISSN -cup = cup.rename(columns = {'e-ISSN' : 'ISSN'}) -cup - - -# In[28]: - - -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd, cc_by_nc_sa -cup['article_version'] = 'published' -cup['license'] = 'cc_by' -cup['CUP'] = 'x' -# ajout des dates -cup['valid_from'] = '2021-01-01' -cup['valid_until'] = '2023-12-31' -# ajout du embargo et archiving -cup['embargo_months'] = 60 -cup['archiving'] = True - - -# In[29]: - - -# append -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc' -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc_nd' -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc_sa' -rp = rp.append(cup, ignore_index=True) -rp - - -# In[30]: - - -# test des lignes sans embargo -rp.loc[rp['embargo_months'].isna()] - - -# In[31]: - - -# ajout des ISSN-L -issnl = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issnl - - -# In[32]: - - -# renommer les colonnes -issnl = issnl.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -rp = rp.rename(columns={'ISSN' : 'issn'}) - - -# In[33]: - - -# merge -rp = pd.merge(rp, issnl, on='issn', how='left') -rp - - -# In[34]: - - -# cummuler les issns pour le merge -# rp_1 = rp.loc[rp['issnl'].notna()][['issnl', 'article_version', 'license', 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP']] -# rp_1 = rp_1.rename(columns = {'issnl' : 'issn'}) -# rp_2 = rp.loc[rp['issn'].notna()][['issn', 'article_version', 'license', 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP']] -# rp_all = rp_1.append(rp_2, ignore_index=True) -rp_all = rp - - -# In[35]: - - -# ajouter les champs manquants -# valeur discount (id 2) à 100% pour les licences read & publish -# elsevier['amount'] = 100 -# elsevier['symbol'] = '%' -# elsevier['cost_factor_type'] = 2 -# elsevier['comment'] = 'Source: swissuniversities' -# elsevier - - -# In[36]: - - -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_elsevier = participants.loc[participants['Elsevier'].notna()][['Elsevier', 'ROR']] -rp_elsevier = rp_all.loc[rp_all['Elsevier'].notna()] -rp_1 = pd.merge(rp_elsevier, participants_elsevier, on='Elsevier', how='outer') -rp_1 - - -# In[37]: - - -rp_elsevier - - -# In[38]: - - -participants_elsevier - - -# In[39]: - - -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_springer = participants.loc[participants['Springer Nature'].notna()][['Springer Nature', 'ROR']] -rp_springer = rp_all.loc[rp_all['Springer Nature'].notna()] -rp_2 = pd.merge(rp_springer, participants_springer, on='Springer Nature', how='outer') -rp_2 - - -# In[40]: - - -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_wiley = participants.loc[participants['Wiley'].notna()][['Wiley', 'ROR']] -rp_wiley = rp_all.loc[rp_all['Wiley'].notna()] -rp_3 = pd.merge(rp_wiley, participants_wiley, on='Wiley', how='outer') -rp_3 - - -# In[41]: - - -rp_wiley - - -# In[42]: - - -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_tf = participants.loc[participants['TF'].notna()][['TF', 'ROR']] -rp_tf = rp_all.loc[rp_all['TF'].notna()] -rp_4 = pd.merge(rp_tf, participants_tf, on='TF', how='outer') -rp_4 - - -# In[43]: - - -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_cup = participants.loc[participants['CUP'].notna()][['CUP', 'ROR']] -rp_cup = rp_all.loc[rp_all['CUP'].notna()] -rp_5 = pd.merge(rp_cup, participants_cup, on='CUP', how='outer') -rp_5 - - -# In[44]: - - -# concat des 5 -rp_fin = rp_1.append(rp_2, ignore_index=True) -rp_fin = rp_fin.append(rp_3, ignore_index=True) -rp_fin = rp_fin.append(rp_4, ignore_index=True) -rp_fin = rp_fin.append(rp_5, ignore_index=True) -rp_fin - - -# In[45]: - - -# supprimer les doublons et les vides -rp_fin = rp_fin.dropna(subset=['issn']) -rp_fin = rp_fin.drop_duplicates(subset=['issn', 'license', 'ROR']) -rp_fin - - -# In[46]: - - -# reindex et ajout de l'id avec l'index + 1 -rp_fin = rp_fin.reset_index() -del rp_fin['index'] -rp_fin = rp_fin.reset_index() -rp_fin['rp_id'] = rp_fin.index + 1 -rp_fin - - -# In[47]: - - -rp_fin['embargo_months'].value_counts() - - -# In[48]: - - -# test des lignes sans embargo -rp_fin.loc[rp_fin['embargo_months'].isna()] - - -# In[49]: - - -issn - - -# In[50]: - - -# merge pour avoir l'issnl -issn = pd.merge(issn, issnl, on='issn', how='left') -issn - - -# In[51]: - - -issn.loc[issn['issnl'].isna()] - - -# In[52]: - - -# merge dans l'autre sens pour garder que les lignes du fichier -rp_fin = pd.merge(rp_fin, issn[['id', 'journal', 'issnl']], on='issnl', how='left') -rp_fin - - -# In[53]: - - -# test des lignes sans embargo -rp_fin.loc[rp_fin['embargo_months'].isna() & rp_fin['id'].notna()] - - -# In[54]: - - -# garder les lignes avec merge -rp_fin_merge = rp_fin.loc[rp_fin['id'].notna()] -rp_fin_merge - - -# In[55]: - - -# supprimer les doublons et les vides -rp_fin_merge = rp_fin_merge.drop_duplicates(subset=['rp_id']) -rp_fin_merge - - -# In[56]: - - -# test des lignes sans journal -rp_fin_merge.loc[rp_fin_merge['journal'].isna()] - - -# In[57]: - - -# convertir l'index en id -del rp_fin_merge['id'] -del rp_fin_merge['index'] -del rp_fin_merge['rp_id'] -rp_fin_merge = rp_fin_merge.reset_index() -# ajout de l'id avec l'index + 1 -rp_fin_merge['rp_id'] = rp_fin_merge['index'] + 1 -del rp_fin_merge['index'] -rp_fin_merge - - -# In[58]: - - -# convertir l'index en id -del rp_fin_merge['rp_id'] -rp_fin_merge = rp_fin_merge.reset_index() -# ajout de l'id avec l'index + 1 -rp_fin_merge['rp_id'] = rp_fin_merge['index'] + 1 -del rp_fin_merge['index'] -rp_fin_merge - - -# In[59]: - - -rp_fin_merge['embargo_months'].value_counts() - - -# In[60]: - - -# test des lignes sans embargo -rp_fin_merge.loc[rp_fin_merge['embargo_months'].isna()] - - -# In[61]: - - -# export excel -rp_fin_merge.to_excel('sample/read_publish_brut_merge.xlsx', index=False) - - -# In[62]: - - -# export csv -rp_fin_merge.to_csv('sample/read_publish_brut_merge.tsv', sep='\t', index=False) - diff --git a/import_scripts/10_oacct_terms.md b/import_scripts/10_oacct_terms.md deleted file mode 100644 index 9b95fd74..00000000 --- a/import_scripts/10_oacct_terms.md +++ /dev/null @@ -1,39541 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet de modifier les données extraites des differentes sources et les exporter dans les tables de l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 08.09.2021 - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) -# definir le debut des ids -id_start = 1 -``` - -## Import du fichier extrait de Sherpa - - -```python -sherpa = pd.read_csv('sample/sherpa_policies_brut.tsv', encoding='utf-8', header=0, sep='\t') -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN1
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN2
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN3
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN4
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN5
..........................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8591
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8592
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8593
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8594
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8595
-

8595 rows × 29 columns

-
- - - - -```python -# test des valeurs pour les versions -sherpa['article_version'].value_counts() -``` - - - - - published 4688 - accepted 3251 - submitted 656 - Name: article_version, dtype: int64 - - - - -```python -# test des valeurs pour les issns -sherpa.loc[sherpa['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid
-
- - - - -```python -# ajout des ISSN-L -issns = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISSNISSN-L
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -issns = issns.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnl
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -# merge avec la table sherpa -sherpa = pd.merge(sherpa, issns, on='issn', how='left') -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnl
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-2815
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-2815
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-2815
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-2815
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-4842
.............................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-9953
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-9953
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-9953
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-9953
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-9953
-

8595 rows × 30 columns

-
- - - - -```python -# test des valeurs pour les issnl -sherpa.loc[sherpa['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnl
-
- - - - -```python -# extraction des données IR Archiving + Embargo par ISSN -sherpa_ir = sherpa[['issnl', ]] -``` - -## Import du fichier des licences Read & Publish - - -```python -rp = pd.read_csv('sample/read_publish_brut_merge.tsv', encoding='utf-8', header=0, sep='\t') -rp -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3058: DtypeWarning: Columns (0,1,3,4) have mixed types. Specify dtype option on import or set low_memory=False. - interactivity=interactivity, compiler=compiler, result=result) - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_id
0NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01
1NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02
2NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03
3NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05
.........................................................
40078xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079
40079xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080
40080xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081
40081xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082
40082xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083
-

40083 rows × 18 columns

-
- - - - -```python -rp['embargo_months'].value_counts() -``` - - - - - 0 39163 - 60 920 - Name: embargo_months, dtype: int64 - - - - -```python -# ajout de l'éditeur dans un seul champ -# rp.loc[rp['Elsevier'] == 'x', 'public_notes'] = 'Elsevier Read & Publish agreement' -rp.loc[rp['Elsevier'] == 'x', 'rp_publisher'] = 'Elsevier' -rp.loc[rp['Springer Nature'] == 'x', 'rp_publisher'] = 'Springer Nature' -rp.loc[rp['Wiley'] == 'x', 'rp_publisher'] = 'Wiley' -rp.loc[rp['TF'] == 'x', 'rp_publisher'] = 'TF' -rp.loc[rp['CUP'] == 'x', 'rp_publisher'] = 'CUP' -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_idrp_publisher
0NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier
1NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier
2NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier
3NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier
............................................................
40078xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP
40079xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP
40080xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP
40081xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP
40082xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP
-

40083 rows × 19 columns

-
- - - - -```python -# test des valeurs pour les versions -rp['rp_publisher'].value_counts() -``` - - - - - Elsevier 18128 - Wiley 13905 - Springer Nature 6716 - CUP 920 - TF 414 - Name: rp_publisher, dtype: int64 - - - - -```python -# test des valeurs pour les versions -rp['license'].value_counts() -``` - - - - - cc_by 17701 - cc_by_nc_nd 13929 - cc_by_nc 8223 - cc_by_nc_sa 230 - Name: license, dtype: int64 - - - - -```python -# supprimer les champs inutiles et renommer les colonnes -del rp['Elsevier'] -del rp['Springer Nature'] -del rp['Wiley'] -del rp['TF'] -del rp['CUP'] -del rp['URL'] -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnTitlearchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_idrp_publisher
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier
..........................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP
-

40083 rows × 13 columns

-
- - - - -```python -# renommer les colonnes -rp = rp.rename(columns = {'Title' : 'title', 'ROR' : 'ror', 'read_publish_id' : 'rp_id'}) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlrorjournalrp_idrp_publisher
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier
..........................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP
-

40083 rows × 13 columns

-
- - - -## Table applicable_version - - -```python -# creation du DF -col_names = ['id', - 'type', - 'description' - ] -applicable_version = pd.DataFrame(columns = col_names) -# 3 values : published, accepted, submitted -new_row1 = {'id':1, 'type':'submitted', 'description' : 'Submitted version'} -new_row2 = {'id':2, 'type':'accepted', 'description' : 'Accepted version'} -new_row3 = {'id':3, 'type':'published', 'description' : 'Published version'} -#append row to the dataframe -applicable_version = applicable_version.append(new_row1, ignore_index=True) -applicable_version = applicable_version.append(new_row2, ignore_index=True) -applicable_version = applicable_version.append(new_row3, ignore_index=True) -applicable_version -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idtypedescription
01submittedSubmitted version
12acceptedAccepted version
23publishedPublished version
-
- - - - -```python -# ajout de la valeur UNKNOWN -applicable_version = applicable_version.append({'id' : 999999, 'type' : 'UNKNOWN', 'description' : 'UNKNOWN'}, ignore_index=True) -applicable_version -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idtypedescription
01submittedSubmitted version
12acceptedAccepted version
23publishedPublished version
3999999UNKNOWNUNKNOWN
-
- - - - -```python -# renommage des champs finaux -applicable_version_export = applicable_version[['id', 'description']] -``` - - -```python -# export de la table applicable_version -result = applicable_version_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/version.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -applicable_version_export.to_csv('sample/version.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -applicable_version_export.to_excel('sample/version.xlsx', index=False) -``` - - -```python -# merge avec la table sherpa -sherpa = pd.merge(sherpa, applicable_version[['id', 'type']], left_on='article_version', right_on='type', how='left') -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid_xissnlid_ytype
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151submitted
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152accepted
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-28153published
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-28153published
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421submitted
...................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531submitted
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532accepted
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533published
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-99533published
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-99533published
-

8595 rows × 32 columns

-
- - - - -```python -sherpa = sherpa.rename(columns = {'id_x' : 'id', 'id_y' : 'version'}) -del sherpa['type'] -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversion
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-28153
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-28153
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421
................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-99533
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-99533
-

8595 rows × 31 columns

-
- - - - -```python -# merge avec la table read & publish -rp = pd.merge(rp, applicable_version[['id', 'type']], left_on='article_version', right_on='type', how='left') -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlrorjournalrp_idrp_publisheridtype
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier3published
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier3published
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier3published
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier3published
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier3published
................................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP3published
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP3published
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP3published
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP3published
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP3published
-

40083 rows × 15 columns

-
- - - - -```python -rp = rp.rename(columns = {'id' : 'version'}) -del rp['type'] -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversion
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier3
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier3
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier3
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier3
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier3
.............................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP3
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP3
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP3
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP3
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP3
-

40083 rows × 14 columns

-
- - - -## Table oa_licence - - -```python -# creation du DF -# 'version' n'est pas utilisée, on dédoublonne par nom sans la version -col_names = ['id', - 'name', - 'url' - ] -oa_licence = pd.DataFrame(columns = col_names) -oa_licence -``` - - - - -
- - - - - - - - - - - - -
idnameurl
-
- - - - -```python -# export des licences -sherpa['license'].value_counts() -``` - - - - - cc_by 4151 - cc_by_nc_nd 2338 - cc_by_nc 559 - bespoke_license 47 - cc_by_nc_sa 20 - cc_by_nd 7 - cc_by_sa 4 - cc0 3 - all_rights_reserved 1 - Name: license, dtype: int64 - - - - -```python -sherpa_licences = sherpa['license'].drop_duplicates() -sherpa_licences = sherpa_licences.dropna() -sherpa_licences -``` - - - - - 2 cc_by - 3 cc_by_nc_nd - 8 bespoke_license - 29 cc_by_nc - 425 cc_by_nc_sa - 443 all_rights_reserved - 2147 cc_by_sa - 2148 cc_by_nd - 8420 cc0 - Name: license, dtype: object - - - - -```python -oa_licence['sherpa_code'] = np.nan -oa_licence -``` - - - - -
- - - - - - - - - - - - - -
idnameurlsherpa_code
-
- - - - -```python -for code in sherpa_licences: - print (code) - oa_licence = oa_licence.append({'sherpa_code' : code}, ignore_index=True) -``` - - cc_by - cc_by_nc_nd - bespoke_license - cc_by_nc - cc_by_nc_sa - all_rights_reserved - cc_by_sa - cc_by_nd - cc0 - - - -```python -oa_licence -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnameurlsherpa_code
0NaNNaNNaNcc_by
1NaNNaNNaNcc_by_nc_nd
2NaNNaNNaNbespoke_license
3NaNNaNNaNcc_by_nc
4NaNNaNNaNcc_by_nc_sa
5NaNNaNNaNall_rights_reserved
6NaNNaNNaNcc_by_sa
7NaNNaNNaNcc_by_nd
8NaNNaNNaNcc0
-
- - - - -```python -# convertir l'index en id -oa_licence = oa_licence.reset_index() -# ajout de l'id avec l'index + 1 -oa_licence['id'] = oa_licence['index'] + 1 -del oa_licence['index'] -oa_licence -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnameurlsherpa_code
01NaNNaNcc_by
12NaNNaNcc_by_nc_nd
23NaNNaNbespoke_license
34NaNNaNcc_by_nc
45NaNNaNcc_by_nc_sa
56NaNNaNall_rights_reserved
67NaNNaNcc_by_sa
78NaNNaNcc_by_nd
89NaNNaNcc0
-
- - - - -```python -# ajout du nom et des URLs -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by', 'name'] = 'CC BY' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by', 'url'] = 'https://creativecommons.org/licenses/by/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_sa', 'name'] = 'CC BY-SA' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_sa', 'url'] = 'https://creativecommons.org/licenses/by-sa/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc', 'name'] = 'CC BY-NC' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc', 'url'] = 'https://creativecommons.org/licenses/by-nc/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_sa', 'name'] = 'CC BY-NC-SA' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_sa', 'url'] = 'https://creativecommons.org/licenses/by-nc-sa/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nd', 'name'] = 'CC BY-ND' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nd', 'url'] = 'https://creativecommons.org/licenses/by-nd/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_nd', 'name'] = 'CC BY-NC-ND' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_nd', 'url'] = 'https://creativecommons.org/licenses/by-nc-nd/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc0', 'name'] = 'CC0' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc0', 'url'] = 'https://creativecommons.org/publicdomain/zero/1.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'name'] = 'Specific license' -oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'url'] = '' -oa_licence.loc[oa_licence['sherpa_code'] == 'all_rights_reserved', 'name'] = 'All rights reserved' -oa_licence.loc[oa_licence['sherpa_code'] == 'all_rights_reserved', 'url'] = '' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_gnu_gpl', 'name'] = 'GNU GPL' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_gnu_gpl', 'url'] = 'http://gnugpl.org/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_public_domain', 'name'] = 'Public domain' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_public_domain', 'url'] = 'https://creativecommons.org/share-your-work/public-domain/' -# oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'url'] = 'https://port.sas.ac.uk/mod/book/view.php?id=1340&chapterid=1003' -oa_licence -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnameurlsherpa_code
01CC BYhttps://creativecommons.org/licenses/by/4.0/cc_by
12CC BY-NC-NDhttps://creativecommons.org/licenses/by-nc-nd/...cc_by_nc_nd
23Specific licensebespoke_license
34CC BY-NChttps://creativecommons.org/licenses/by-nc/4.0/cc_by_nc
45CC BY-NC-SAhttps://creativecommons.org/licenses/by-nc-sa/...cc_by_nc_sa
56All rights reservedall_rights_reserved
67CC BY-SAhttps://creativecommons.org/licenses/by-sa/4.0/cc_by_sa
78CC BY-NDhttps://creativecommons.org/licenses/by-nd/4.0/cc_by_nd
89CC0https://creativecommons.org/publicdomain/zero/...cc0
-
- - - - -```python -# ajout de la valeur UNKNOWN -oa_licence = oa_licence.append({'id' : 999999, 'sherpa_code' : '___', 'name' : 'UNKNOWN', 'url' : ''}, ignore_index=True) -oa_licence -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnameurlsherpa_code
01CC BYhttps://creativecommons.org/licenses/by/4.0/cc_by
12CC BY-NC-NDhttps://creativecommons.org/licenses/by-nc-nd/...cc_by_nc_nd
23Specific licensebespoke_license
34CC BY-NChttps://creativecommons.org/licenses/by-nc/4.0/cc_by_nc
45CC BY-NC-SAhttps://creativecommons.org/licenses/by-nc-sa/...cc_by_nc_sa
56All rights reservedall_rights_reserved
67CC BY-SAhttps://creativecommons.org/licenses/by-sa/4.0/cc_by_sa
78CC BY-NDhttps://creativecommons.org/licenses/by-nd/4.0/cc_by_nd
89CC0https://creativecommons.org/publicdomain/zero/...cc0
9999999UNKNOWN___
-
- - - - -```python -# ajout aux tables sherpa et rp -sherpa = sherpa.rename(columns = {'license' : 'sherpa_code'}) -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversion
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-28153
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-28153
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421
................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-99533
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-99533
-

8595 rows × 31 columns

-
- - - - -```python -# ajout aux tables sherpa et rp -rp = rp.rename(columns = {'license' : 'sherpa_code'}) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthssherpa_codevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversion
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier3
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier3
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier3
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier3
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier3
.............................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP3
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP3
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP3
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP3
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP3
-

40083 rows × 14 columns

-
- - - - -```python -# merge -sherpa = pd.merge(sherpa, oa_licence[['sherpa_code', 'id']], on='sherpa_code', how='left') -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid_xissnlversionid_y
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151NaN
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152NaN
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421NaN
...................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531NaN
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532NaN
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0
-

8595 rows × 32 columns

-
- - - - -```python -sherpa = sherpa.rename(columns = {'id_x' : 'id', 'id_y' : 'licence'}) -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicence
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151NaN
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152NaN
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421NaN
...................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531NaN
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532NaN
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0
-

8595 rows × 32 columns

-
- - - - -```python -# merge -rp = pd.merge(rp, oa_licence[['sherpa_code', 'id']], on='sherpa_code', how='left') -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthssherpa_codevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversionid
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier31
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier31
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier31
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier31
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier31
................................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP35
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP35
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP35
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP35
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP35
-

40083 rows × 15 columns

-
- - - - -```python -rp = rp.rename(columns = {'id' : 'licence'}) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthssherpa_codevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversionlicence
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier31
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier31
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier31
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier31
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier31
................................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP35
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP35
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP35
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP35
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP35
-

40083 rows × 15 columns

-
- - - - -```python -# renommage des champs finaux -oa_licence_export = oa_licence[['id', 'name', 'url']] -oa_licence_export = oa_licence_export.rename(columns={'name' : 'name_or_abbrev', 'url' : 'website'}) -``` - - -```python -# export de la table oa_licence -result = oa_licence_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/licence.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -oa_licence_export.to_csv('sample/licence.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -oa_licence_export.to_excel('sample/licence.xlsx', index=False) -``` - -## Table cost_factor_type - - -```python -# creation du DF -col_names = ['id', - 'name' - ] -cost_factor_type = pd.DataFrame(columns = col_names) -cost_factor_type = cost_factor_type.append({'id' : 1, 'name' : 'APC'}, ignore_index=True) -cost_factor_type = cost_factor_type.append({'id' : 2, 'name' : 'Discount'}, ignore_index=True) -cost_factor_type = cost_factor_type.append({'id' : 3, 'name' : 'Refund'}, ignore_index=True) -cost_factor_type -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - -
idname
01APC
12Discount
23Refund
-
- - - - -```python -# ajout de la valeur UNKNOWN -cost_factor_type = cost_factor_type.append({'id' : 999999, 'name' : 'UNKNOWN'}, ignore_index=True) -cost_factor_type -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idname
01APC
12Discount
23Refund
3999999UNKNOWN
-
- - - - -```python -# export de la table -result = cost_factor_type.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/cost_factor_type.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -cost_factor_type.to_csv('sample/cost_factor_type.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -cost_factor_type.to_excel('sample/cost_factor_type.xlsx', index=False) -``` - -## Table cost_factor - -### Ajout des données des APCs depuis DOAJ - - -```python -# ajout de DOAJ info -doaj = pd.read_csv('doaj/journalcsv__doaj_20210312_0636_utf8.csv', encoding='utf-8', header=0) -doaj -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal titleJournal URLURL in DOAJAlternative titleJournal ISSN (print version)Journal EISSN (online version)KeywordsLanguages in which the journal accepts manuscriptsPublisherCountry of publisherSociety or institutionCountry of society or institutionJournal licenseLicense attributesURL for license termsMachine-readable CC licensing information embedded or displayed in articlesURL to an example page with embedded licensing informationAuthor holds copyright without restrictionsCopyright information URLReview processReview process information URLJournal plagiarism screening policyPlagiarism information URLURL for journal's aims & scopeURL for the Editorial Board pageURL for journal's instructions for authorsAverage number of weeks between article submission and publicationAPCAPC information URLAPC amountJournal waiver policy (for developing country authors etc)Waiver policy information URLHas other feesOther submission fees information URLPreservation ServicesPreservation Service: national libraryPreservation information URLDeposit policy directoryURL for deposit policyPersistent article identifiersArticle metadata includes ORCIDsJournal complies with I4OC standards for open citationsDoes this journal allow unrestricted reuse in compliance with BOAI?URL for journal's Open Access statementContinuesContinued ByLCC CodesSubjectsDOAJ SealAdded on DateLast updated DateNumber of Article RecordsMost Recent Article Added
0Anais da Academia Brasileira de Ciênciashttp://www.scielo.br/scielo.php?script=sci_ser...https://doaj.org/toc/ed09859a464f4461b1af34279...Annals of the Brazilian Academy of Sciences0001-37651678-2690biological sciences, exact and earth sciences,...EnglishAcademia Brasileira de CiênciasBrazilNaNNaNCC BYNaNhttp://www.scielo.br/revistas/aabc/iaboutj.htmYeshttp://www.scielo.br/scielo.php?script=sci_art...NoNaNPeer reviewhttp://www.scielo.br/revistas/aabc/iinstruc.htmYeshttp://www.scielo.br/revistas/aabc/iinstruc.htmhttp://www.scielo.br/revistas/aabc/iaboutj.htmhttp://www.scielo.br/revistas/aabc/iedboard.htmhttp://www.scielo.br/revistas/aabc/iinstruc.htm18Nohttp://www.scielo.br/revistas/aabc/iinstruc.htmNaNNoNaNNohttp://www.scielo.br/revistas/aabc/iinstruc.htmNaNNaNNaNNaNNaNDOINaNNaNYeshttp://www.scielo.br/revistas/aabc/isubscrp.htmNaNNaNQScienceNo2004-04-23T21:31:00Z2017-01-04T14:19:54Z26492020-06-10T21:49:11Z
1ACMEhttp://riviste.unimi.it/index.php/ACMEhttps://doaj.org/toc/b1ca04ba56194f29a362b3eef...NaN0001-494X2282-0035italian literature, classic literature, lingui...ItalianUniversità degli Studi di MilanoItalyNaNNaNCC BY-NC-NDNaNhttp://riviste.unimi.it/index.php/ACME/indexYeshttp://riviste.unimi.it/index.php/ACME/article...Yeshttp://riviste.unimi.it/index.php/ACME/about/e...Blind peer reviewhttps://riviste.unimi.it/index.php/ACME/aboutNoNaNhttps://riviste.unimi.it/index.php/ACME/abouthttps://riviste.unimi.it/index.php/ACME/about/...http://riviste.unimi.it/index.php/ACME/about/s...12Nohttps://riviste.unimi.it/index.php/Lebenswelt/...NaNNoNaNNohttps://riviste.unimi.it/index.php/Lebenswelt/...NaNItalian National Library (BNCF)http://www.depositolegale.it/NaNNaNDOI, NBNNaNNaNYeshttp://riviste.unimi.it/index.php/ACME/about/e...NaNNaNAGeneral WorksNo2014-12-22T19:55:58Z2020-02-24T09:07:42Z1662020-06-19T09:42:34Z
2Acta Dermato-Venereologicahttp://www.medicaljournals.se/actahttps://doaj.org/toc/ffde9666ab1d46f1a8c688ce6...NaN0001-55551651-2057sexually transmitted infections, psoriasis, ps...EnglishSociety for Publication of Acta Dermato-Venere...SwedenNaNNaNCC BY-NCNaNhttps://www.medicaljournals.se/acta/open-acces...NaNNaNNoNaNPeer reviewhttps://www.medicaljournals.se/acta/instructio...NoNaNhttp://www.medicaljournals.se/actahttps://www.medicaljournals.se/acta/editorshttps://www.medicaljournals.se/acta/instructio...20Yeshttps://www.medicaljournals.se/acta/instructio...1600 EURNoNaNYeshttps://www.medicaljournals.se/acta/instructio...NaNNaNhttp://www.ingentaconnect.com/publisher/claimi...Sherpa/RomeoNaNDOINaNNaNYeshttps://www.medicaljournals.se/acta/open-acces...NaNNaNRL1-803Medicine: DermatologyNo2011-11-10T12:31:05Z2017-02-22T11:14:48Z10962021-03-11T13:41:33Z
3Acta Médica Costarricensehttp://actamedica.medicos.cr/index.php/Acta_Me...https://doaj.org/toc/a5919aee5ad2413a89cf32df0...NaN0001-60122215-5856medicine, public health, medical sciences, healthEnglish, SpanishColegio de Médicos y Cirujanos de Costa RicaCosta RicaNaNNaNCC BY-NC-SANaNhttp://actamedica.medicos.cr/index.php/Acta_Me...NaNNaNNohttp://actamedica.medicos.cr/index.php/Acta_Me...Double blind peer reviewhttp://actamedica.medicos.cr/index.php/Acta_Me...Yeshttp://actamedica.medicos.cr/index.php/Acta_Me...http://actamedica.medicos.cr/index.php/Acta_Me...http://actamedica.medicos.cr/index.php/Acta_Me...http://actamedica.medicos.cr/index.php/Acta_Me...12Nohttp://actamedica.medicos.cr/index.php/Acta_Me...NaNNoNaNNoNaNPKP PNNaNhttp://actamedica.medicos.cr/index.php/Acta_Me...Sherpa/Romeohttp://actamedica.medicos.cr/index.php/Acta_Me...NaNNoNoYeshttp://actamedica.medicos.cr/index.php/Acta_Me...NaNNaNRMedicineNo2020-12-22T11:08:24Z2020-12-22T11:08:24Z12072015-12-08T15:06:43Z
4Acta Mycologicahttps://pbsociety.org.pl/journals/index.php/am...https://doaj.org/toc/0e8e2531ae3f455ebb49acb08...NaN0001-625X2353-074Xmycology, micromycetes, marcomycetes, slime mo...EnglishPolish Botanical SocietyPolandNaNNaNCC BYNaNhttps://pbsociety.org.pl/journals/index.php/am...Yeshttps://doi.org/10.5586/am.5511Yeshttps://pbsociety.org.pl/journals/index.php/am...Double blind peer reviewhttps://pbsociety.org.pl/journals/index.php/am...Yeshttps://pbsociety.org.pl/journals/index.php/am...https://pbsociety.org.pl/journals/index.php/am...https://pbsociety.org.pl/journals/index.php/am...https://pbsociety.org.pl/journals/index.php/am...16Yeshttps://pbsociety.org.pl/journals/index.php/am...400 EURNoNaNNoNaNNaNNaNNaNSherpa/Romeohttps://v2.sherpa.ac.uk/id/publication/25478DOIYesYesYeshttps://pbsociety.org.pl/journals/index.php/am...NaNNaNQH301-705.5Science: Biology (General)No2014-05-29T20:02:32Z2021-01-16T17:41:32Z11542021-03-05T18:55:46Z
..................................................................................................................................................................
16024BME Frontiershttps://spj.sciencemag.org/bmefhttps://doaj.org/toc/f9fa881c1be5443a86ed71c2e...Biomedical Engineering FrontiersNaN2765-8031biomedical imaging, biomedical devices, biomat...EnglishAmerican Association for the Advancement of Sc...United StatesSuzhou Institute of Biomedical Engineering and...ChinaCC BYNaNhttps://spj.sciencemag.org/bmef/guidelines/#co...Yeshttps://spj.sciencemag.org/journals/bmef/2020/...Nohttps://spj.sciencemag.org/bmef/guidelines/#co...Blind peer reviewhttps://spj.sciencemag.org/bmef/peer-review-pr...Yeshttps://spj.sciencemag.org/bmef/publication-et...https://spj.sciencemag.org/bmef/about/#mission...https://spj.sciencemag.org/bmef/editors/https://spj.sciencemag.org/bmef/guidelines/16Nohttps://spj.sciencemag.org/bmef/apc/NaNYeshttps://spj.sciencemag.org/bmef/apc/NoNaNNaNNaNNaNNaNNaNDOIYesYesYeshttps://spj.sciencemag.org/bmef/about/NaNNaNR855-855.5|TP248.13-248.65Medicine: Medicine (General): Medical technolo...No2021-01-22T11:54:20Z2021-01-22T11:54:20Z112021-03-08T09:06:36Z
16025Harvard Kennedy School Misinformation Reviewhttps://misinforeview.hks.harvard.eduhttps://doaj.org/toc/d71096ec7090499681cc0ccf8...HKS Misinformation ReviewNaN2766-1652misinformation, disinformation, fake newsEnglishHarvard Kennedy SchoolUnited StatesNaNNaNCC BYNaNhttps://misinforeview.hks.harvard.edu/editoria...Yeshttps://misinforeview.hks.harvard.edu/article/...Yeshttps://misinforeview.hks.harvard.edu/editoria...Double blind peer reviewhttps://misinforeview.hks.harvard.edu/editoria...NoNaNhttps://misinforeview.hks.harvard.edu/our-miss...https://misinforeview.hks.harvard.edu/editoria...https://misinforeview.hks.harvard.edu/submit/10Nohttps://misinforeview.hks.harvard.edu/editoria...NaNNoNaNNoNaNNaNNaNNaNNaNNaNDOIYesNoYeshttps://misinforeview.hks.harvard.edu/editoria...NaNNaNT58.5-58.64|P87-96Technology: Technology (General): Industrial e...No2021-02-12T10:29:21Z2021-02-12T10:29:21Z0NaN
16026One Health & Risk Managementhttps://journal.ohrm.bba.md/index.php/journal-...https://doaj.org/toc/68671b966cd24a0ebaa44d78f...OH&RM2887-34582587-3466one health, risc management, public health, hu...English, Romanian, French, RussianAsociatia de Biosiguranta si BiosecuritateMoldova, Republic ofNaNNaNCC BYNaNhttps://journal.ohrm.bba.md/index.php/journal-...Yeshttps://journal.ohrm.bba.md/index.php/journal-...Yeshttps://journal.ohrm.bba.md/index.php/journal-...Double blind peer reviewhttps://journal.ohrm.bba.md/index.php/journal-...NoNaNhttps://journal.ohrm.bba.md/index.php/journal-...https://journal.ohrm.bba.md/index.php/journal-...https://journal.ohrm.bba.md/index.php/journal-...10Nohttps://journal.ohrm.bba.md/index.php/journal-...NaNNoNaNNoNaNNaNNaNNaNNaNNaNDOI, UDCYesNoYeshttps://journal.ohrm.bba.md/index.php/journal-...NaNNaNR|QMedicine | ScienceNo2021-03-04T16:06:58Z2021-03-04T16:06:58Z42021-03-04T20:46:57Z
16027فصلنامه پژوهش‌های مدیریت منابع انسانیhttps://hrmj.ihu.ac.ir/?lang=enhttps://doaj.org/toc/87d44ffb6ff849b18d5ddce9c...Journal of Research in Human Resources Management8254-80022645-5072human resources managementPersianImam Hussein UniversityIran, Islamic Republic ofNaNNaNCC BYNaNhttps://hrmj.ihu.ac.ir/journal/about?lang=enNaNNaNYeshttps://hrmj.ihu.ac.ir/journal/about?lang=enDouble blind peer reviewhttps://hrmj.ihu.ac.ir/journal/process?lang=enNoNaNhttps://hrmj.ihu.ac.ir/journal/aim_scope?lang=enhttps://hrmj.ihu.ac.ir/journal/editorial.board...https://hrmj.ihu.ac.ir/journal/authors.note?la...20Nohttps://hrmj.ihu.ac.ir/?lang=enNaNNoNaNNoNaNNaNNaNNaNNaNNaNNaNNoNoYeshttps://hrmj.ihu.ac.ir/?lang=enNaNNaNHF5549-5549.5Social Sciences: Commerce: Business: Personnel...No2021-01-20T11:27:05Z2021-01-20T11:27:05Z0NaN
16028Science of Tsunami Hazardshttp://tsunamisociety.org/https://doaj.org/toc/a4f06be11f4f4db489dc034c7...NaN8755-6839NaNtsunamis, tsunami warning systems, earthquakes...EnglishTsunami Society InternationalUnited StatesTsunami Society InternationalNaNCC BYNaNhttp://tsunamisociety.org/InstructionsAuthors....NaNNaNNoNaNPeer reviewhttp://tsunamisociety.org/PeerReview.htmlNoNaNhttp://tsunamisociety.org/AboutUs.htmlhttp://tsunamisociety.org/EditorialBoard.htmlhttp://tsunamisociety.org/InstructionsAuthors....12Nohttp://tsunamisociety.org/InstructionsAuthors....NaNNoNaNYeshttp://tsunamisociety.org/InstructionsAuthors....NaNNaNNaNNaNNaNNaNNaNNaNYeshttp://tsunamisociety.org/AboutUs.htmlNaNNaNGC1-1581Geography. Anthropology. Recreation: OceanographyNo2009-04-16T17:40:30Z2016-07-21T16:09:38Z2392021-02-27T01:00:51Z
-

16029 rows × 53 columns

-
- - - - -```python -# garder les lignes avec APC -doaj_apc = doaj.loc[doaj['APC'] == 'Yes'][['Journal ISSN (print version)', 'Journal EISSN (online version)', 'APC amount']] -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amount
20001-55551651-20571600 EUR
40001-625X2353-074X400 EUR
50001-69181873-62971500 USD
60001-69772083-9480520 EUR
110003-10622327-97883500 USD
............
16002NaN2722-1253200 USD
16004NaN2722-723535 USD
160052722-96882722-9696500000 IDR
16007NaN2723-1097100000 IDR
160222765-01892765-0235700 USD
-

4462 rows × 3 columns

-
- - - - -```python -# garder les lignes avec APC no -doaj_apc_no = doaj.loc[doaj['APC'] == 'No'][['Journal ISSN (print version)', 'Journal EISSN (online version)']] -doaj_apc_no -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)
00001-37651678-2690
10001-494X2282-0035
30001-60122215-5856
70001-70191846-0410
80002-03971868-6869
.........
16024NaN2765-8031
16025NaN2766-1652
160262887-34582587-3466
160278254-80022645-5072
160288755-6839NaN
-

11567 rows × 2 columns

-
- - - - -```python -# attribuer la valeur 0 -doaj_apc_no['APC amount'] = 0 -doaj_apc_no -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amount
00001-37651678-26900
10001-494X2282-00350
30001-60122215-58560
70001-70191846-04100
80002-03971868-68690
............
16024NaN2765-80310
16025NaN2766-16520
160262887-34582587-34660
160278254-80022645-50720
160288755-6839NaN0
-

11567 rows × 3 columns

-
- - - - -```python -# ajout à la table des APC -doaj_apc = doaj_apc.append(doaj_apc_no, ignore_index=True) -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amount
00001-55551651-20571600 EUR
10001-625X2353-074X400 EUR
20001-69181873-62971500 USD
30001-69772083-9480520 EUR
40003-10622327-97883500 USD
............
16024NaN2765-80310
16025NaN2766-16520
160262887-34582587-34660
160278254-80022645-50720
160288755-6839NaN0
-

16029 rows × 3 columns

-
- - - - -```python -# découpage du prix en 'amount' et 'symbol' -doaj_apc[['amount', 'symbol']] = doaj_apc['APC amount'].str.split(' ', n=1, expand=True) -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amountamountsymbol
00001-55551651-20571600 EUR1600EUR
10001-625X2353-074X400 EUR400EUR
20001-69181873-62971500 USD1500USD
30001-69772083-9480520 EUR520EUR
40003-10622327-97883500 USD3500USD
..................
16024NaN2765-80310NaNNaN
16025NaN2766-16520NaNNaN
160262887-34582587-34660NaNNaN
160278254-80022645-50720NaNNaN
160288755-6839NaN0NaNNaN
-

16029 rows × 5 columns

-
- - - - -```python -doaj_apc.loc[doaj_apc['APC amount'] == 0, 'amount'] = 0 -doaj_apc.loc[doaj_apc['APC amount'] == 0, 'symbol'] = '' -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amountamountsymbol
00001-55551651-20571600 EUR1600EUR
10001-625X2353-074X400 EUR400EUR
20001-69181873-62971500 USD1500USD
30001-69772083-9480520 EUR520EUR
40003-10622327-97883500 USD3500USD
..................
16024NaN2765-803100
16025NaN2766-165200
160262887-34582587-346600
160278254-80022645-507200
160288755-6839NaN00
-

16029 rows × 5 columns

-
- - - - -```python -# ajouter les champs manquants -doaj_apc['cost_factor_type'] = 1 -doaj_apc['comment'] = 'Source: DOAJ' -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amountamountsymbolcost_factor_typecomment
00001-55551651-20571600 EUR1600EUR1Source: DOAJ
10001-625X2353-074X400 EUR400EUR1Source: DOAJ
20001-69181873-62971500 USD1500USD1Source: DOAJ
30001-69772083-9480520 EUR520EUR1Source: DOAJ
40003-10622327-97883500 USD3500USD1Source: DOAJ
........................
16024NaN2765-8031001Source: DOAJ
16025NaN2766-1652001Source: DOAJ
160262887-34582587-3466001Source: DOAJ
160278254-80022645-5072001Source: DOAJ
160288755-6839NaN001Source: DOAJ
-

16029 rows × 7 columns

-
- - - - -```python -# renommer les champs -doaj_apc = doaj_apc.rename(columns = {'Journal ISSN (print version)' : 'issn_print', 'Journal EISSN (online version)' : 'issn_electronic'}) -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecomment
00001-55551651-20571600 EUR1600EUR1Source: DOAJ
10001-625X2353-074X400 EUR400EUR1Source: DOAJ
20001-69181873-62971500 USD1500USD1Source: DOAJ
30001-69772083-9480520 EUR520EUR1Source: DOAJ
40003-10622327-97883500 USD3500USD1Source: DOAJ
........................
16024NaN2765-8031001Source: DOAJ
16025NaN2766-1652001Source: DOAJ
160262887-34582587-3466001Source: DOAJ
160278254-80022645-5072001Source: DOAJ
160288755-6839NaN001Source: DOAJ
-

16029 rows × 7 columns

-
- - - - -```python -# ajout du issn -doaj_apc['issn'] = doaj_apc['issn_electronic'] -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecommentissn
00001-55551651-20571600 EUR1600EUR1Source: DOAJ1651-2057
10001-625X2353-074X400 EUR400EUR1Source: DOAJ2353-074X
20001-69181873-62971500 USD1500USD1Source: DOAJ1873-6297
30001-69772083-9480520 EUR520EUR1Source: DOAJ2083-9480
40003-10622327-97883500 USD3500USD1Source: DOAJ2327-9788
...........................
16024NaN2765-8031001Source: DOAJ2765-8031
16025NaN2766-1652001Source: DOAJ2766-1652
160262887-34582587-3466001Source: DOAJ2587-3466
160278254-80022645-5072001Source: DOAJ2645-5072
160288755-6839NaN001Source: DOAJNaN
-

16029 rows × 8 columns

-
- - - - -```python -doaj_apc.loc[doaj_apc['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecommentissn
120013-9998NaN350 EUR350EUR1Source: DOAJNaN
140015-4040NaN747 USD747USD1Source: DOAJNaN
170017-0011NaN400 EUR400EUR1Source: DOAJNaN
290026-1165NaN220000 JPY220000JPY1Source: DOAJNaN
300026-279XNaN350 USD350USD1Source: DOAJNaN
...........................
158672676-5357NaN001Source: DOAJNaN
158922686-9594NaN001Source: DOAJNaN
159372701-1569NaN001Source: DOAJNaN
159742709-8370NaN001Source: DOAJNaN
160288755-6839NaN001Source: DOAJNaN
-

1461 rows × 8 columns

-
- - - - -```python -# ajout du issnp quand c'est vide -doaj_apc.loc[doaj_apc['issn'].isna(), 'issn'] = doaj_apc['issn_print'] -doaj_apc.loc[doaj_apc['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecommentissn
-
- - - - -```python -doaj_apc = pd.merge(doaj_apc, issns, on='issn', how='left') -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecommentissnissnl
00001-55551651-20571600 EUR1600EUR1Source: DOAJ1651-20570001-5555
10001-625X2353-074X400 EUR400EUR1Source: DOAJ2353-074X0001-625X
20001-69181873-62971500 USD1500USD1Source: DOAJ1873-62970001-6918
30001-69772083-9480520 EUR520EUR1Source: DOAJ2083-94800001-6977
40003-10622327-97883500 USD3500USD1Source: DOAJ2327-97880003-1062
..............................
16024NaN2765-8031001Source: DOAJ2765-8031NaN
16025NaN2766-1652001Source: DOAJ2766-1652NaN
160262887-34582587-3466001Source: DOAJ2587-3466NaN
160278254-80022645-5072001Source: DOAJ2645-5072NaN
160288755-6839NaN001Source: DOAJ8755-68398755-6839
-

16029 rows × 9 columns

-
- - - - -```python -# renommer les colonnes -doaj_apc = doaj_apc.rename(columns={'issnl' : 'issn_link'}) -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecommentissnissn_link
00001-55551651-20571600 EUR1600EUR1Source: DOAJ1651-20570001-5555
10001-625X2353-074X400 EUR400EUR1Source: DOAJ2353-074X0001-625X
20001-69181873-62971500 USD1500USD1Source: DOAJ1873-62970001-6918
30001-69772083-9480520 EUR520EUR1Source: DOAJ2083-94800001-6977
40003-10622327-97883500 USD3500USD1Source: DOAJ2327-97880003-1062
..............................
16024NaN2765-8031001Source: DOAJ2765-8031NaN
16025NaN2766-1652001Source: DOAJ2766-1652NaN
160262887-34582587-3466001Source: DOAJ2587-3466NaN
160278254-80022645-5072001Source: DOAJ2645-5072NaN
160288755-6839NaN001Source: DOAJ8755-68398755-6839
-

16029 rows × 9 columns

-
- - - -### Ajout des APCs depuis la base Journal Database (Zurich Open Repository and Archive) - -https://www.jdb.uzh.ch/ - - -```python -# JDB base de Zurich -jdb = pd.read_csv('zora/jdb_apcs.tsv', encoding='utf-8', header=0, sep='\t') -jdb -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissn_printissn_electronicissn_linkapc_feeapc_currencyapc_date
0100011662-51611662-51611662-51612490USD2018
1100011662-51611662-51611662-51612950USD2020
2100020952-33831467-85780952-33832500EUR2017
3100051179-72581179-72581179-72581958USD2018
4100051179-72581179-72581179-72581958USD2020
........................
1157599861549-96341549-96421549-96343000USD2015
1157699861549-96341549-96421549-96343550USD2016
1157799861549-96341549-96421549-96343550USD2017
1157899861549-96341549-96421549-96343750USD2018
1157999950816-46491465-33030816-46492950USD2017
-

11580 rows × 7 columns

-
- - - - -```python -# renommer l'id -jdb = jdb.rename(columns = {'id' : 'jdb_id'}) -jdb -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
jdb_idissn_printissn_electronicissn_linkapc_feeapc_currencyapc_date
0100011662-51611662-51611662-51612490USD2018
1100011662-51611662-51611662-51612950USD2020
2100020952-33831467-85780952-33832500EUR2017
3100051179-72581179-72581179-72581958USD2018
4100051179-72581179-72581179-72581958USD2020
........................
1157599861549-96341549-96421549-96343000USD2015
1157699861549-96341549-96421549-96343550USD2016
1157799861549-96341549-96421549-96343550USD2017
1157899861549-96341549-96421549-96343750USD2018
1157999950816-46491465-33030816-46492950USD2017
-

11580 rows × 7 columns

-
- - - - -```python -# ajouter les champs manquants -jdb['cost_factor_type'] = 1 -jdb['comment'] = 'Source: JDB (' + jdb['apc_date'].astype(str) + ')' -jdb -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
jdb_idissn_printissn_electronicissn_linkapc_feeapc_currencyapc_datecost_factor_typecomment
0100011662-51611662-51611662-51612490USD20181Source: JDB (2018)
1100011662-51611662-51611662-51612950USD20201Source: JDB (2020)
2100020952-33831467-85780952-33832500EUR20171Source: JDB (2017)
3100051179-72581179-72581179-72581958USD20181Source: JDB (2018)
4100051179-72581179-72581179-72581958USD20201Source: JDB (2020)
..............................
1157599861549-96341549-96421549-96343000USD20151Source: JDB (2015)
1157699861549-96341549-96421549-96343550USD20161Source: JDB (2016)
1157799861549-96341549-96421549-96343550USD20171Source: JDB (2017)
1157899861549-96341549-96421549-96343750USD20181Source: JDB (2018)
1157999950816-46491465-33030816-46492950USD20171Source: JDB (2017)
-

11580 rows × 9 columns

-
- - - - -```python -# renommer les champs -jdb = jdb.rename(columns = {'apc_fee' : 'amount', 'apc_currency' : 'symbol'}) -jdb -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
jdb_idissn_printissn_electronicissn_linkamountsymbolapc_datecost_factor_typecomment
0100011662-51611662-51611662-51612490USD20181Source: JDB (2018)
1100011662-51611662-51611662-51612950USD20201Source: JDB (2020)
2100020952-33831467-85780952-33832500EUR20171Source: JDB (2017)
3100051179-72581179-72581179-72581958USD20181Source: JDB (2018)
4100051179-72581179-72581179-72581958USD20201Source: JDB (2020)
..............................
1157599861549-96341549-96421549-96343000USD20151Source: JDB (2015)
1157699861549-96341549-96421549-96343550USD20161Source: JDB (2016)
1157799861549-96341549-96421549-96343550USD20171Source: JDB (2017)
1157899861549-96341549-96421549-96343750USD20181Source: JDB (2018)
1157999950816-46491465-33030816-46492950USD20171Source: JDB (2017)
-

11580 rows × 9 columns

-
- - - - -```python -jdb = jdb.drop_duplicates(subset='jdb_id', keep='last') -``` - - -```python -# import openapc avec les valeurs max -openapc = pd.read_csv('openapc/open_apc_max.tsv', encoding='utf-8', header=0, sep='\t') -openapc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
periodeuroissnissn_printissn_electronicissn_l
020181385.360001-07820001-0782NaN0001-0782
120181811.880001-14520001-14521533-385X0001-1452
220201826.490001-14520001-14521533-385X0001-1452
320132238.760001-1541NaNNaN0001-1541
420141887.860001-1541NaNNaN0001-1541
.....................
2379320132400.008756-7938NaNNaN1520-6033
2379420141822.498756-7938NaNNaN1520-6033
2379520161762.698756-7938NaNNaN1520-6033
2379620173248.318756-7938NaNNaN1520-6033
2379720192913.118756-7938NaNNaN1520-6033
-

23798 rows × 6 columns

-
- - - - -```python -# renommer les champs -openapc = openapc.rename(columns = {'period' : 'apc_date', 'issn_l' : 'issn_link', 'euro' : 'amount'}) -openapc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
apc_dateamountissnissn_printissn_electronicissn_link
020181385.360001-07820001-0782NaN0001-0782
120181811.880001-14520001-14521533-385X0001-1452
220201826.490001-14520001-14521533-385X0001-1452
320132238.760001-1541NaNNaN0001-1541
420141887.860001-1541NaNNaN0001-1541
.....................
2379320132400.008756-7938NaNNaN1520-6033
2379420141822.498756-7938NaNNaN1520-6033
2379520161762.698756-7938NaNNaN1520-6033
2379620173248.318756-7938NaNNaN1520-6033
2379720192913.118756-7938NaNNaN1520-6033
-

23798 rows × 6 columns

-
- - - - -```python -# ajouter le lien avec le type et le symbole -openapc['cost_factor_type'] = 1 -openapc['jdb_id'] = np.nan -openapc['symbol'] = 'EUR' -openapc['comment'] = 'Source: OpenAPC (' + openapc['apc_date'].astype(str) + ')' -openapc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
apc_dateamountissnissn_printissn_electronicissn_linkcost_factor_typejdb_idsymbolcomment
020181385.360001-07820001-0782NaN0001-07821NaNEURSource: OpenAPC (2018)
120181811.880001-14520001-14521533-385X0001-14521NaNEURSource: OpenAPC (2018)
220201826.490001-14520001-14521533-385X0001-14521NaNEURSource: OpenAPC (2020)
320132238.760001-1541NaNNaN0001-15411NaNEURSource: OpenAPC (2013)
420141887.860001-1541NaNNaN0001-15411NaNEURSource: OpenAPC (2014)
.................................
2379320132400.008756-7938NaNNaN1520-60331NaNEURSource: OpenAPC (2013)
2379420141822.498756-7938NaNNaN1520-60331NaNEURSource: OpenAPC (2014)
2379520161762.698756-7938NaNNaN1520-60331NaNEURSource: OpenAPC (2016)
2379620173248.318756-7938NaNNaN1520-60331NaNEURSource: OpenAPC (2017)
2379720192913.118756-7938NaNNaN1520-60331NaNEURSource: OpenAPC (2019)
-

23798 rows × 10 columns

-
- - - - -```python -# ajout des lignes de openapc -jdb = jdb.append(openapc, ignore_index=True) -jdb -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py:7123: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version - of pandas will change to not sort by default. - - To accept the future behavior, pass 'sort=False'. - - To retain the current behavior and silence the warning, pass 'sort=True'. - - sort=sort, - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
amountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbol
02950.002020Source: JDB (2020)1NaN1662-51611662-51611662-516110001.0USD
12500.002017Source: JDB (2017)1NaN1467-85780952-33830952-338310002.0EUR
21958.002020Source: JDB (2020)1NaN1179-72581179-72581179-725810005.0USD
31370.002020Source: JDB (2020)1NaN1479-58761479-5876NaN10015.0GBP
42200.002017Source: JDB (2017)1NaN1572-85521383-49241383-492410023.0EUR
.................................
299472400.002013Source: OpenAPC (2013)18756-7938NaN1520-6033NaNNaNEUR
299481822.492014Source: OpenAPC (2014)18756-7938NaN1520-6033NaNNaNEUR
299491762.692016Source: OpenAPC (2016)18756-7938NaN1520-6033NaNNaNEUR
299503248.312017Source: OpenAPC (2017)18756-7938NaN1520-6033NaNNaNEUR
299512913.112019Source: OpenAPC (2019)18756-7938NaN1520-6033NaNNaNEUR
-

29952 rows × 10 columns

-
- - - - -```python -# supprimer les doublons par issnl et date -jdb = jdb.drop_duplicates(subset=['issn_link', 'apc_date'], keep='first') -jdb -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
amountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbol
02950.002020Source: JDB (2020)1NaN1662-51611662-51611662-516110001.0USD
12500.002017Source: JDB (2017)1NaN1467-85780952-33830952-338310002.0EUR
21958.002020Source: JDB (2020)1NaN1179-72581179-72581179-725810005.0USD
31370.002020Source: JDB (2020)1NaN1479-58761479-5876NaN10015.0GBP
42200.002017Source: JDB (2017)1NaN1572-85521383-49241383-492410023.0EUR
.................................
299472400.002013Source: OpenAPC (2013)18756-7938NaN1520-6033NaNNaNEUR
299481822.492014Source: OpenAPC (2014)18756-7938NaN1520-6033NaNNaNEUR
299491762.692016Source: OpenAPC (2016)18756-7938NaN1520-6033NaNNaNEUR
299503248.312017Source: OpenAPC (2017)18756-7938NaN1520-6033NaNNaNEUR
299512913.112019Source: OpenAPC (2019)18756-7938NaN1520-6033NaNNaNEUR
-

29478 rows × 10 columns

-
- - - - -```python -# ajout de DOAJ -cost_factor = doaj_apc.append(jdb, ignore_index=True) -cost_factor -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbol
01600 EUR1600NaNSource: DOAJ11651-20571651-20570001-55550001-5555NaNEUR
1400 EUR400NaNSource: DOAJ12353-074X2353-074X0001-625X0001-625XNaNEUR
21500 USD1500NaNSource: DOAJ11873-62971873-62970001-69180001-6918NaNUSD
3520 EUR520NaNSource: DOAJ12083-94802083-94800001-69770001-6977NaNEUR
43500 USD3500NaNSource: DOAJ12327-97882327-97880003-10620003-1062NaNUSD
....................................
45502NaN24002013Source: OpenAPC (2013)18756-7938NaN1520-6033NaNNaNEUR
45503NaN1822.492014Source: OpenAPC (2014)18756-7938NaN1520-6033NaNNaNEUR
45504NaN1762.692016Source: OpenAPC (2016)18756-7938NaN1520-6033NaNNaNEUR
45505NaN3248.312017Source: OpenAPC (2017)18756-7938NaN1520-6033NaNNaNEUR
45506NaN2913.112019Source: OpenAPC (2019)18756-7938NaN1520-6033NaNNaNEUR
-

45507 rows × 11 columns

-
- - - - -```python -# test issnl -cost_factor.loc[cost_factor['issn_link'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbol
13540 PLN540NaNSource: DOAJ12544-85522544-8552NaN0014-8261NaNPLN
62100 USD100NaNSource: DOAJ12545-31492545-3149NaN0079-4252NaNUSD
129423 EUR423NaNSource: DOAJ12605-33222605-3322NaN0212-9426NaNEUR
133200 EUR200NaNSource: DOAJ12603-59872603-5987NaN0214-9877NaNEUR
140800000 IDR800000NaNSource: DOAJ12621-11222621-1122NaN0216-3438NaNIDR
....................................
26703NaN3873.612016Source: OpenAPC (2016)10263-8762NaNNaN0263-8762NaNEUR
26704NaN2557.732017Source: OpenAPC (2017)10263-8762NaNNaN0263-8762NaNEUR
26705NaN3564.252018Source: OpenAPC (2018)10263-8762NaNNaN0263-8762NaNEUR
27923NaN1130.52019Source: OpenAPC (2019)10342-183XNaNNaN0342-183XNaNEUR
45474NaN16902020Source: OpenAPC (2020)12691-9478NaNNaNNaNNaNEUR
-

2500 rows × 11 columns

-
- - - - -```python -# merge avec issnl -cost_factor = pd.merge(cost_factor, issns, on='issn', how='left') -cost_factor -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbolissnl
01600 EUR1600NaNSource: DOAJ11651-20571651-20570001-55550001-5555NaNEUR0001-5555
1400 EUR400NaNSource: DOAJ12353-074X2353-074X0001-625X0001-625XNaNEUR0001-625X
21500 USD1500NaNSource: DOAJ11873-62971873-62970001-69180001-6918NaNUSD0001-6918
3520 EUR520NaNSource: DOAJ12083-94802083-94800001-69770001-6977NaNEUR0001-6977
43500 USD3500NaNSource: DOAJ12327-97882327-97880003-10620003-1062NaNUSD0003-1062
.......................................
45502NaN24002013Source: OpenAPC (2013)18756-7938NaN1520-6033NaNNaNEUR1520-6033
45503NaN1822.492014Source: OpenAPC (2014)18756-7938NaN1520-6033NaNNaNEUR1520-6033
45504NaN1762.692016Source: OpenAPC (2016)18756-7938NaN1520-6033NaNNaNEUR1520-6033
45505NaN3248.312017Source: OpenAPC (2017)18756-7938NaN1520-6033NaNNaNEUR1520-6033
45506NaN2913.112019Source: OpenAPC (2019)18756-7938NaN1520-6033NaNNaNEUR1520-6033
-

45507 rows × 12 columns

-
- - - - -```python -# test issnl -cost_factor.loc[cost_factor['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbolissnl
13540 PLN540NaNSource: DOAJ12544-85522544-8552NaN0014-8261NaNPLNNaN
62100 USD100NaNSource: DOAJ12545-31492545-3149NaN0079-4252NaNUSDNaN
129423 EUR423NaNSource: DOAJ12605-33222605-3322NaN0212-9426NaNEURNaN
133200 EUR200NaNSource: DOAJ12603-59872603-5987NaN0214-9877NaNEURNaN
140800000 IDR800000NaNSource: DOAJ12621-11222621-1122NaN0216-3438NaNIDRNaN
.......................................
45472NaN698.652019Source: OpenAPC (2019)12690-00092690-00092690-0009NaNNaNEURNaN
45473NaN754.672019Source: OpenAPC (2019)12690-3202NaN2690-3202NaNNaNEURNaN
45474NaN16902020Source: OpenAPC (2020)12691-9478NaNNaNNaNNaNEURNaN
45475NaN1523.22020Source: OpenAPC (2020)12699-00162699-00162699-0016NaNNaNEURNaN
45476NaN3052020Source: OpenAPC (2020)12704-61922704-61922280-18552280-1855NaNEURNaN
-

8935 rows × 12 columns

-
- - - - -```python -#ajout des issn quand ça manque -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_print'] -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_electronic'] -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_link'] -cost_factor.loc[cost_factor['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbolissnl
-
- - - - -```python -#ajout des issnl quand ça manque -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_link'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_print'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_electronic'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn'] -cost_factor.loc[cost_factor['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbolissnl
-
- - - - -```python -# prendre les ids pour le merge -cost_factor_ids = cost_factor[['issn', 'issnl', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_1 = cost_factor_ids_1.rename(columns = {'issn_link' : 'issn'}) -# cost_factor_ids_2 = cost_factor.loc[cost_factor['issn_electronic'].notna()][['issn_electronic', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_2 = cost_factor_ids_2.rename(columns = {'issn_electronic' : 'issn'}) -# cost_factor_ids_3 = cost_factor.loc[cost_factor['issn_print'].notna()][['issn_print', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_3 = cost_factor_ids_3.rename(columns = {'issn_print' : 'issn'}) -# cost_factor_ids_4 = cost_factor.loc[cost_factor['issn'].notna()][['issn', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids = cost_factor_ids_1.append(cost_factor_ids_2) -# cost_factor_ids = cost_factor_ids.append(cost_factor_ids_3) -# cost_factor_ids = cost_factor_ids.append(cost_factor_ids_4) -cost_factor_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcomment
01651-20570001-555511600EURSource: DOAJ
12353-074X0001-625X1400EURSource: DOAJ
21873-62970001-691811500USDSource: DOAJ
32083-94800001-69771520EURSource: DOAJ
42327-97880003-106213500USDSource: DOAJ
.....................
455028756-79381520-603312400EURSource: OpenAPC (2013)
455038756-79381520-603311822.49EURSource: OpenAPC (2014)
455048756-79381520-603311762.69EURSource: OpenAPC (2016)
455058756-79381520-603313248.31EURSource: OpenAPC (2017)
455068756-79381520-603312913.11EURSource: OpenAPC (2019)
-

45507 rows × 6 columns

-
- - - - -```python -# supprimer les doublons et les vides -cost_factor_ids = cost_factor_ids.drop_duplicates(subset=['issnl']) -cost_factor_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcomment
01651-20570001-555511600EURSource: DOAJ
12353-074X0001-625X1400EURSource: DOAJ
21873-62970001-691811500USDSource: DOAJ
32083-94800001-69771520EURSource: DOAJ
42327-97880003-106213500USDSource: DOAJ
.....................
454732690-32022690-32021754.67EURSource: OpenAPC (2019)
454742691-94782691-947811690EURSource: OpenAPC (2020)
454778750-75871522-160112355.13EURSource: OpenAPC (2016)
454818755-12091944-920812627.74EURSource: OpenAPC (2013)
454988756-758X1460-269512725.08EURSource: OpenAPC (2014)
-

24018 rows × 6 columns

-
- - - - -```python -# merge dans l'autre sens pour garder que les lignes du fichier -cost_factor_ids = pd.merge(cost_factor_ids, sherpa[['id', 'issnl']], on='issnl', how='left') -cost_factor_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentid
01651-20570001-555511600EURSource: DOAJNaN
12353-074X0001-625X1400EURSource: DOAJNaN
21873-62970001-691811500USDSource: DOAJNaN
32083-94800001-69771520EURSource: DOAJNaN
42327-97880003-106213500USDSource: DOAJNaN
........................
313972690-32022690-32021754.67EURSource: OpenAPC (2019)NaN
313982691-94782691-947811690EURSource: OpenAPC (2020)NaN
313998750-75871522-160112355.13EURSource: OpenAPC (2016)NaN
314008755-12091944-920812627.74EURSource: OpenAPC (2013)NaN
314018756-758X1460-269512725.08EURSource: OpenAPC (2014)NaN
-

31402 rows × 7 columns

-
- - - - -```python -# garder les lignes avec merge -cost_factor_ids_all = cost_factor_ids.loc[cost_factor_ids['id'].notnull()] -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentid
231083-351X0021-925812500USDSource: DOAJ1369.0
241083-351X0021-925812500USDSource: DOAJ1370.0
251083-351X0021-925812500USDSource: DOAJ1371.0
261083-351X0021-925812500USDSource: DOAJ1372.0
311536-59640025-797411950USDSource: DOAJ2147.0
........................
312972475-99532475-995312023.37EURSource: OpenAPC (2017)8591.0
312982475-99532475-995312023.37EURSource: OpenAPC (2017)8592.0
312992475-99532475-995312023.37EURSource: OpenAPC (2017)8593.0
313002475-99532475-995312023.37EURSource: OpenAPC (2017)8594.0
313012475-99532475-995312023.37EURSource: OpenAPC (2017)8595.0
-

7964 rows × 7 columns

-
- - - - -```python -# supprimer les doublons -cost_factor_ids_all = cost_factor_ids_all.drop_duplicates(subset=['id']) -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentid
231083-351X0021-925812500USDSource: DOAJ1369.0
241083-351X0021-925812500USDSource: DOAJ1370.0
251083-351X0021-925812500USDSource: DOAJ1371.0
261083-351X0021-925812500USDSource: DOAJ1372.0
311536-59640025-797411950USDSource: DOAJ2147.0
........................
312972475-99532475-995312023.37EURSource: OpenAPC (2017)8591.0
312982475-99532475-995312023.37EURSource: OpenAPC (2017)8592.0
312992475-99532475-995312023.37EURSource: OpenAPC (2017)8593.0
313002475-99532475-995312023.37EURSource: OpenAPC (2017)8594.0
313012475-99532475-995312023.37EURSource: OpenAPC (2017)8595.0
-

7964 rows × 7 columns

-
- - - - -```python -# supprimer les doublons par issnl -cost_factor_ids_all = cost_factor_ids_all.drop_duplicates(subset=['issnl']) -del cost_factor_ids_all['id'] -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcomment
231083-351X0021-925812500USDSource: DOAJ
311536-59640025-797411950USDSource: DOAJ
2221592-87210390-607812000EURSource: DOAJ
3031555-38920963-689712750USDSource: DOAJ
4021095-95721053-811913000USDSource: DOAJ
.....................
312372469-99262469-992612156.51EURSource: OpenAPC (2015)
312422469-99502469-995012143.51EURSource: OpenAPC (2016)
312482470-00102470-001011763.13EURSource: OpenAPC (2016)
312532470-00452470-004511211.45EURSource: OpenAPC (2016)
312972475-99532475-995312023.37EURSource: OpenAPC (2017)
-

580 rows × 6 columns

-
- - - - -```python -# convertir l'index en id -cost_factor_ids_all = cost_factor_ids_all.reset_index() -# ajout de l'id avec l'index + 1 -cost_factor_ids_all['cost_factor'] = cost_factor_ids_all['index'] + id_start -del cost_factor_ids_all['index'] -# convertir l'index en id -cost_factor_ids_all = cost_factor_ids_all.reset_index() -# ajout de l'id avec l'index + 1 -cost_factor_ids_all['cost_factor'] = cost_factor_ids_all['index'] + id_start -del cost_factor_ids_all['index'] -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentcost_factor
01083-351X0021-925812500USDSource: DOAJ1
11536-59640025-797411950USDSource: DOAJ2
21592-87210390-607812000EURSource: DOAJ3
31555-38920963-689712750USDSource: DOAJ4
41095-95721053-811913000USDSource: DOAJ5
........................
5752469-99262469-992612156.51EURSource: OpenAPC (2015)576
5762469-99502469-995012143.51EURSource: OpenAPC (2016)577
5772470-00102470-001011763.13EURSource: OpenAPC (2016)578
5782470-00452470-004511211.45EURSource: OpenAPC (2016)579
5792475-99532475-995312023.37EURSource: OpenAPC (2017)580
-

580 rows × 7 columns

-
- - - - -```python -# merge avec la table sherpa -sherpa = pd.merge(sherpa, cost_factor_ids_all[['issnl', 'cost_factor']], on='issnl', how='left') -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151NaN355.0
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152NaN355.0
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0355.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0355.0
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421NaN356.0
......................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531NaN580.0
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532NaN580.0
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN580.0
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0580.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0580.0
-

8595 rows × 33 columns

-
- - - - -```python -sherpa.loc[sherpa['cost_factor'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
937870002-95137391https://v2.sherpa.ac.uk/id/publisher_policy/11nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repositoryNaNarXiv ; bioRxiv ; Preprint RepositoryarXiv ; bioRxivNaNauthorsNaNFalseMust be assigned a DOICan not be deposited after submission to journal940002-95131NaNNaN
947870002-95137391https://v2.sherpa.ac.uk/id/publisher_policy/11nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNinstitutional_repositoryInstitutional RepositoryNaNNaNNaNpublishersNaNTrueMust link to publisher version with DOINaN950002-95132NaNNaN
957870002-95137391https://v2.sherpa.ac.uk/id/publisher_policy/11nonopublishedNaN12NaNTrueNational Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0NaNnamed_repositoryNaNPubMed CentralPubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseMust link to publisher version with DOINaN960002-95133NaNNaN
967870002-95137391https://v2.sherpa.ac.uk/id/publisher_policy/11nonopublishedNaN12NaNTrueWellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08gbhttp://www.wellcome.ac.uk/695.0NaNnamed_repositoryNaNPubMed CentralPubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseMust link to publisher version with DOINaN970002-95133NaNNaN
977870002-95137391https://v2.sherpa.ac.uk/id/publisher_policy/11nonopublishedNaN12NaNTrueMedical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517gbhttp://www.mrc.ac.uk/index.htm705.0NaNnamed_repositoryNaNPubMed CentralPubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseMust link to publisher version with DOINaN980002-95133NaNNaN
......................................................................................................
81995651661-81578459https://v2.sherpa.ac.uk/id/publisher_policy/3494noyespublishedcc_by_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; subject_repos...Any WebsitePubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNNaNNaNTruePublished source must be acknowledged with cit...NaN82001661-815738.0NaN
82005651661-81578459https://v2.sherpa.ac.uk/id/publisher_policy/3494noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; subject_repos...Any WebsitePubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNNaNNaNTruePublished source must be acknowledged with cit...NaN82011661-815732.0NaN
83735301946-623411116https://v2.sherpa.ac.uk/id/publisher_policy/3nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repositoryNaNarXiv ; bioRxiv ; Preprint RepositoryarXiv ; bioRxivNaNNaNNaNFalseMay be considered prior publication, contact j...NaN83741946-62341NaNNaN
83745301946-623411116https://v2.sherpa.ac.uk/id/publisher_policy/3nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repositoryInstitutional RepositoryAuthor's HomepageNaNNaNauthorsNaNTruePublished source must be acknowledged with DOI...NaN83751946-62342NaNNaN
83755301946-623411116https://v2.sherpa.ac.uk/id/publisher_policy/3nonoacceptedNaN6when_required_by_funderNaNNaNNaNNaNNaNNaNNaNNaNfunder_designated_location ; named_repositoryNaNFunder Designated Location ; PubMed CentralPubMed CentralNaNauthorsNaNFalseMust state on submission Funding agency requir...NaN83761946-62342NaNNaN
-

631 rows × 33 columns

-
- - - - -```python -# garder les APCs pour la version published -sherpa.loc[sherpa['article_version'] != 'published', 'cost_factor'] = np.nan -sherpa.loc[sherpa['cost_factor'].notna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0355.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0355.0
64980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNfunder_designated_location ; named_repository ...NaNFunder Designated Location ; PubMed Central ; ...PubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseNaNNaN70001-484231.0356.0
74980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNfunder_designated_location ; named_repository ...NaNFunder Designated Location ; PubMed Central ; ...PubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseNaNNaN80001-484232.0356.0
84980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4noyespublishedbespoke_license0NaNNaNNaNNaNNaNNaNNaNNaNNaNfunder_designated_location ; named_repository ...NaNFunder Designated Location ; PubMed Central ; ...PubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseNaNNaN90001-484233.0356.0
......................................................................................................
85885332470-004531531https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85892470-004531.0579.0
85895332470-004531531https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85902470-004531.0579.0
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN580.0
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0580.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0580.0
-

4462 rows × 33 columns

-
- - - - -```python -# renommer l'id du fichier sherpa brut -# cost_factor_ids_all = cost_factor_ids_all.rename(columns = {'id' : 'id_sherpa'}) -cost_factor_ids_all = cost_factor_ids_all.rename(columns = {'cost_factor' : 'id'}) -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentid
01083-351X0021-925812500USDSource: DOAJ1
11536-59640025-797411950USDSource: DOAJ2
21592-87210390-607812000EURSource: DOAJ3
31555-38920963-689712750USDSource: DOAJ4
41095-95721053-811913000USDSource: DOAJ5
........................
5752469-99262469-992612156.51EURSource: OpenAPC (2015)576
5762469-99502469-995012143.51EURSource: OpenAPC (2016)577
5772470-00102470-001011763.13EURSource: OpenAPC (2016)578
5782470-00452470-004511211.45EURSource: OpenAPC (2016)579
5792475-99532475-995312023.37EURSource: OpenAPC (2017)580
-

580 rows × 7 columns

-
- - - - -```python -cost_factor_ids_all['id'] = cost_factor_ids_all['id'].astype(int) -``` - - -```python -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentid
01083-351X0021-925812500USDSource: DOAJ1
11536-59640025-797411950USDSource: DOAJ2
21592-87210390-607812000EURSource: DOAJ3
31555-38920963-689712750USDSource: DOAJ4
41095-95721053-811913000USDSource: DOAJ5
........................
5752469-99262469-992612156.51EURSource: OpenAPC (2015)576
5762469-99502469-995012143.51EURSource: OpenAPC (2016)577
5772470-00102470-001011763.13EURSource: OpenAPC (2016)578
5782470-00452470-004511211.45EURSource: OpenAPC (2016)579
5792475-99532475-995312023.37EURSource: OpenAPC (2017)580
-

580 rows × 7 columns

-
- - - - -```python -cost_factor_export = cost_factor_ids_all[['id', 'cost_factor_type', 'amount', 'symbol', 'comment']] -cost_factor_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idcost_factor_typeamountsymbolcomment
0112500USDSource: DOAJ
1211950USDSource: DOAJ
2312000EURSource: DOAJ
3412750USDSource: DOAJ
4513000USDSource: DOAJ
..................
57557612156.51EURSource: OpenAPC (2015)
57657712143.51EURSource: OpenAPC (2016)
57757811763.13EURSource: OpenAPC (2016)
57857911211.45EURSource: OpenAPC (2016)
57958012023.37EURSource: OpenAPC (2017)
-

580 rows × 5 columns

-
- - - - -```python -cost_factor_export.shape[0] -``` - - - - - 580 - - - - -```python -# ajout de la valeur Rabais 100% pour les licences Read & Publish -rpid = cost_factor_export.shape[0] + 1 -cost_factor_export = cost_factor_export.append({'id' : rpid, 'cost_factor_type' : 2, 'amount' : 100, 'symbol' : '%', 'comment' : 'Read & Publish agreement'}, ignore_index=True) -cost_factor_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idcost_factor_typeamountsymbolcomment
0112500USDSource: DOAJ
1211950USDSource: DOAJ
2312000EURSource: DOAJ
3412750USDSource: DOAJ
4513000USDSource: DOAJ
..................
57657712143.51EURSource: OpenAPC (2016)
57757811763.13EURSource: OpenAPC (2016)
57857911211.45EURSource: OpenAPC (2016)
57958012023.37EURSource: OpenAPC (2017)
5805812100%Read & Publish agreement
-

581 rows × 5 columns

-
- - - - -```python -# ajout de l'id dans la table read & publish -rp['cost_factor'] = rpid -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthssherpa_codevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversionlicencecost_factor
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier31581
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier31581
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier31581
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier31581
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier31581
...................................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP35581
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP35581
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP35581
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP35581
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP35581
-

40083 rows × 16 columns

-
- - - - -```python -# ajout de la valeur UNKNOWN -cost_factor_export = cost_factor_export.append({'id' : 999999, 'cost_factor_type' : 999999, 'amount' : 0, 'symbol' : '', 'comment' : 'UNKNOWN'}, ignore_index=True) -cost_factor_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idcost_factor_typeamountsymbolcomment
0112500USDSource: DOAJ
1211950USDSource: DOAJ
2312000EURSource: DOAJ
3412750USDSource: DOAJ
4513000USDSource: DOAJ
..................
57757811763.13EURSource: OpenAPC (2016)
57857911211.45EURSource: OpenAPC (2016)
57958012023.37EURSource: OpenAPC (2017)
5805812100%Read & Publish agreement
5819999999999990UNKNOWN
-

582 rows × 5 columns

-
- - - - -```python -# export de la table -result = cost_factor_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/cost_factor.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -cost_factor_export.to_csv('sample/cost_factor.tsv', index=False) -``` - - -```python -# export excel -cost_factor_export.to_excel('sample/cost_factor.xlsx', index=False) -``` - -## Table term - - -```python -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151NaNNaN
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152NaNNaN
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0355.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0355.0
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421NaNNaN
......................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531NaNNaN
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532NaNNaN
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN580.0
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0580.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0580.0
-

8595 rows × 33 columns

-
- - - - -```python -# col_names = ['id', 'applicable_version', 'cost_factor', 'embargo', 'archiving'] -term_sherpa = sherpa[['id', 'version', 'cost_factor', 'embargo', 'archiving', 'locations_ir', 'locations_not_ir', 'licence', 'journal', 'conditions', 'public_notes', 'prerequisite_funders', 'prerequisite_funders_ror']] -term_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idversioncost_factorembargoarchivinglocations_irlocations_not_irlicencejournalconditionspublic_notesprerequisite_fundersprerequisite_funders_ror
011NaN0TrueNon-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...NaN532Must acknowledge acceptance for publication ; ...NaNNaNNaN
122NaN12TrueNon-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...NaN532Publisher source must be acknowledged with cit...NaNNaNNaN
233355.00TrueAny Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...1.0532Published source must be acknowledgedNaNNaNNaN
343355.00TrueAny Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...2.0532Published source must be acknowledgedNaNNaNNaN
451NaN0FalseNaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...NaN498Must not violate ACS ethical Guidelines ; Must...NaNNaNNaN
..........................................
859085911NaN0TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859185922NaN0TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859285933580.00TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859385943580.00TrueAny RepositoryJournal Website1.0608NaNNaNNaNNaN
859485953580.00TrueAny RepositoryJournal Website1.0608NaNNaNNaNNaN
-

8595 rows × 13 columns

-
- - - - -```python -# renommer les champs -term_sherpa = term_sherpa.rename(columns = {'id' : 'id_sherpa', 'embargo' : 'embargo_months', 'prerequisite_funders_ror' : 'ror'}) -term_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglocations_irlocations_not_irlicencejournalconditionspublic_notesprerequisite_fundersror
011NaN0TrueNon-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...NaN532Must acknowledge acceptance for publication ; ...NaNNaNNaN
122NaN12TrueNon-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...NaN532Publisher source must be acknowledged with cit...NaNNaNNaN
233355.00TrueAny Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...1.0532Published source must be acknowledgedNaNNaNNaN
343355.00TrueAny Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...2.0532Published source must be acknowledgedNaNNaNNaN
451NaN0FalseNaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...NaN498Must not violate ACS ethical Guidelines ; Must...NaNNaNNaN
..........................................
859085911NaN0TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859185922NaN0TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859285933580.00TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859385943580.00TrueAny RepositoryJournal Website1.0608NaNNaNNaNNaN
859485953580.00TrueAny RepositoryJournal Website1.0608NaNNaNNaNNaN
-

8595 rows × 13 columns

-
- - - - -```python -# merge des champs dans le comment : conditions, public_notes, locations_not_ir -term_sherpa['conditions'] = term_sherpa['conditions'].fillna('') -term_sherpa['public_notes'] = term_sherpa['public_notes'].fillna('') -term_sherpa['locations_not_ir'] = term_sherpa['locations_not_ir'].fillna('') -term_sherpa['locations_ir'] = term_sherpa['locations_ir'].fillna('') -term_sherpa.loc[term_sherpa['locations_not_ir'] != '', 'locations_not_ir'] = 'Non institutional archiving locations: ' + term_sherpa['locations_not_ir'] -term_sherpa.loc[term_sherpa['locations_ir'] != '', 'locations_ir'] = 'Institutional archiving locations: ' + term_sherpa['locations_ir'] -term_sherpa.loc[term_sherpa['archiving'] == False, 'comment'] = term_sherpa['locations_not_ir'] -term_sherpa.loc[term_sherpa['archiving'] == True, 'comment'] = term_sherpa['locations_ir'] -term_sherpa.loc[term_sherpa['comment'] == '', 'comment'] = 'Conditions: ' + term_sherpa['conditions'] -term_sherpa.loc[(term_sherpa['comment'] != '') & (term_sherpa['conditions'] != ''), 'comment'] = term_sherpa['comment'] + ' ; Conditions: ' + term_sherpa['conditions'] -term_sherpa.loc[(term_sherpa['public_notes'] != '') & (term_sherpa['public_notes'] != term_sherpa['comment']), 'comment'] = term_sherpa['comment'] + ' ; Public notes: ' + term_sherpa['public_notes'] -term_sherpa.loc[(term_sherpa['public_notes'] != '') & (term_sherpa['comment'] == ''), 'comment'] = 'Public notes: ' + term_sherpa['public_notes'] -term_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglocations_irlocations_not_irlicencejournalconditionspublic_notesprerequisite_fundersrorcomment
011NaN0TrueInstitutional archiving locations: Non-Commerc...Non institutional archiving locations: Author'...NaN532Must acknowledge acceptance for publication ; ...NaNNaNInstitutional archiving locations: Non-Commerc...
122NaN12TrueInstitutional archiving locations: Non-Commerc...Non institutional archiving locations: Author'...NaN532Publisher source must be acknowledged with cit...NaNNaNInstitutional archiving locations: Non-Commerc...
233355.00TrueInstitutional archiving locations: Any Website...Non institutional archiving locations: PubMed ...1.0532Published source must be acknowledgedNaNNaNInstitutional archiving locations: Any Website...
343355.00TrueInstitutional archiving locations: Any Website...Non institutional archiving locations: PubMed ...2.0532Published source must be acknowledgedNaNNaNInstitutional archiving locations: Any Website...
451NaN0FalseNon institutional archiving locations: ChemRxi...NaN498Must not violate ACS ethical Guidelines ; Must...NaNNaNNon institutional archiving locations: ChemRxi...
.............................................
859085911NaN0TrueInstitutional archiving locations: Institution...Non institutional archiving locations: Author'...NaN608Must link to published article ; Publisher cop...NaNNaNInstitutional archiving locations: Institution...
859185922NaN0TrueInstitutional archiving locations: Institution...Non institutional archiving locations: Author'...NaN608Must link to published article ; Publisher cop...NaNNaNInstitutional archiving locations: Institution...
859285933580.00TrueInstitutional archiving locations: Institution...Non institutional archiving locations: Author'...NaN608Must link to published article ; Publisher cop...NaNNaNInstitutional archiving locations: Institution...
859385943580.00TrueInstitutional archiving locations: Any RepositoryNon institutional archiving locations: Journal...1.0608NaNNaNInstitutional archiving locations: Any Repository
859485953580.00TrueInstitutional archiving locations: Any RepositoryNon institutional archiving locations: Journal...1.0608NaNNaNInstitutional archiving locations: Any Repository
-

8595 rows × 14 columns

-
- - - - -```python -term_sherpa['prerequisite_funders'].value_counts() -``` - - - - - True 5585 - Name: prerequisite_funders, dtype: int64 - - - - -```python -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthssherpa_codevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversionlicencecost_factor
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier31581
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier31581
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier31581
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier31581
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier31581
...................................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP35581
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP35581
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP35581
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP35581
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP35581
-

40083 rows × 16 columns

-
- - - - -```python -term_rp = rp[['rp_id', 'version', 'archiving', 'embargo_months', 'cost_factor', 'licence', 'journal', 'rp_publisher', 'ror', 'valid_from', 'valid_until']] -term_rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rp_idversionarchivingembargo_monthscost_factorlicencejournalrp_publisherrorvalid_fromvalid_until
013True05811899.0Elsevierhttps://ror.org/04d8ztx872020-01-012023-12-31
123True05811899.0Elsevierhttps://ror.org/02bnkt3222020-01-012023-12-31
233True05811899.0Elsevierhttps://ror.org/00zg4za482020-01-012023-12-31
343True05811899.0Elsevierhttps://ror.org/02s3760522020-01-012023-12-31
453True05811899.0Elsevierhttps://ror.org/05a28rw582020-01-012023-12-31
....................................
40078400793True605815592.0CUPhttps://ror.org/01swzsf042021-01-012023-12-31
40079400803True605815592.0CUPhttps://ror.org/019whta542021-01-012023-12-31
40080400813True605815592.0CUPhttps://ror.org/00vasag412021-01-012023-12-31
40081400823True605815592.0CUPhttps://ror.org/05r0ap6202021-01-012023-12-31
40082400833True605815592.0CUPhttps://ror.org/05pmsvm272021-01-012023-12-31
-

40083 rows × 11 columns

-
- - - - -```python -term_rp['rp_publisher'].value_counts() -``` - - - - - Elsevier 18128 - Wiley 13905 - Springer Nature 6716 - CUP 920 - TF 414 - Name: rp_publisher, dtype: int64 - - - - -```python -term_rp.loc[term_rp['rp_publisher'] == 'Elsevier', 'comment'] = 'Elsevier Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'Wiley', 'comment'] = 'Wiley Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'TF', 'comment'] = 'Taylor and Francis Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'Springer Nature ', 'comment'] = 'Springer Nature Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'CUP', 'comment'] = 'Cambridge University Press (CUP) Read & Publish agreement. Article types covered: Research Articles, Review Articles, Rapid Communication, Brief Reports and Case Reports' -del term_rp['rp_publisher'] -term_rp -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexing.py:376: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - self.obj[key] = _infer_fill_value(value) - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexing.py:494: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - self.obj[item] = s - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rp_idversionarchivingembargo_monthscost_factorlicencejournalrorvalid_fromvalid_untilcomment
013True05811899.0https://ror.org/04d8ztx872020-01-012023-12-31Elsevier Read & Publish agreement
123True05811899.0https://ror.org/02bnkt3222020-01-012023-12-31Elsevier Read & Publish agreement
233True05811899.0https://ror.org/00zg4za482020-01-012023-12-31Elsevier Read & Publish agreement
343True05811899.0https://ror.org/02s3760522020-01-012023-12-31Elsevier Read & Publish agreement
453True05811899.0https://ror.org/05a28rw582020-01-012023-12-31Elsevier Read & Publish agreement
....................................
40078400793True605815592.0https://ror.org/01swzsf042021-01-012023-12-31Cambridge University Press (CUP) Read & Publis...
40079400803True605815592.0https://ror.org/019whta542021-01-012023-12-31Cambridge University Press (CUP) Read & Publis...
40080400813True605815592.0https://ror.org/00vasag412021-01-012023-12-31Cambridge University Press (CUP) Read & Publis...
40081400823True605815592.0https://ror.org/05r0ap6202021-01-012023-12-31Cambridge University Press (CUP) Read & Publis...
40082400833True605815592.0https://ror.org/05pmsvm272021-01-012023-12-31Cambridge University Press (CUP) Read & Publis...
-

40083 rows × 11 columns

-
- - - - -```python -# cocnat de deux tables -term_orig = term_sherpa[['id_sherpa', 'version', 'cost_factor', 'embargo_months', 'archiving', 'licence', 'journal', 'prerequisite_funders', 'ror', 'comment']] -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcomment
011NaN0TrueNaN532NaNNaNInstitutional archiving locations: Non-Commerc...
122NaN12TrueNaN532NaNNaNInstitutional archiving locations: Non-Commerc...
233355.00True1.0532NaNNaNInstitutional archiving locations: Any Website...
343355.00True2.0532NaNNaNInstitutional archiving locations: Any Website...
451NaN0FalseNaN498NaNNaNNon institutional archiving locations: ChemRxi...
.................................
859085911NaN0TrueNaN608NaNNaNInstitutional archiving locations: Institution...
859185922NaN0TrueNaN608NaNNaNInstitutional archiving locations: Institution...
859285933580.00TrueNaN608NaNNaNInstitutional archiving locations: Institution...
859385943580.00True1.0608NaNNaNInstitutional archiving locations: Any Repository
859485953580.00True1.0608NaNNaNInstitutional archiving locations: Any Repository
-

8595 rows × 10 columns

-
- - - - -```python -term_orig = term_orig.append(term_rp, ignore_index=True, sort=False) -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_until
01.01NaN0TrueNaN532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN
12.02NaN12TrueNaN532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN
23.03355.00True1.0532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN
34.03355.00True2.0532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN
45.01NaN0FalseNaN498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN
..........................................
48673NaN3581.060True5.0592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31
48674NaN3581.060True5.0592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-31
48675NaN3581.060True5.0592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-31
48676NaN3581.060True5.0592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-31
48677NaN3581.060True5.0592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-31
-

48678 rows × 13 columns

-
- - - - -```python -# ajout d'un hash unique pour chaque variante -term_orig['id_content_hash'] = term_orig.apply(lambda x: hash(tuple(x[['version', 'cost_factor', 'embargo_months', 'archiving', 'comment']])), axis = 1) -term_orig['id_content_hash_licence'] = term_orig.apply(lambda x: hash(tuple(x[['version', 'cost_factor', 'embargo_months', 'archiving', 'licence', 'comment']])), axis = 1) -``` - - -```python -term_orig.sort_values(by='id_content_hash') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licence
65996600.02NaN12TrueNaN923.0Truehttps://ror.org/056y81r79Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
68676868.02NaN12TrueNaN957.0Truehttps://ror.org/056bwcz71Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
47504751.02NaN12TrueNaN642.0Truehttps://ror.org/05w9mt194Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
82368237.02NaN12TrueNaN640.0Truehttps://ror.org/02wxr8x18Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
82378238.02NaN12TrueNaN640.0Truehttps://ror.org/056y81r79Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
................................................
63536354.03222.00True1.0190.0Truehttps://ror.org/02wdwnk04Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63526353.03222.00True1.0190.0Truehttps://ror.org/029chgv08Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63626363.03222.00True1.0190.0Truehttps://ror.org/0472cxd90Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63576358.03222.00True1.0190.0Truehttps://ror.org/0456r8d26Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63636364.03222.00True1.0190.0Truehttps://ror.org/03x94j517Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
-

48678 rows × 15 columns

-
- - - - -```python -# doublons -term_orig.loc[term_orig.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licence
66076608.02NaN12TrueNaN175.0Truehttps://ror.org/02wxr8x18Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
65086509.02NaN12TrueNaN64.0Truehttps://ror.org/05w9mt194Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
12941295.02NaN12TrueNaN342.0Truehttps://ror.org/056bwcz71Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
55615562.02NaN12TrueNaN27.0Truehttps://ror.org/05w9mt194Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
55595560.02NaN12TrueNaN27.0Truehttps://ror.org/056y81r79Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
................................................
63556356.03222.00True1.0190.0Truehttps://ror.org/00cwqg982Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63546355.03222.00True1.0190.0Truehttps://ror.org/02jkpm469Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63536354.03222.00True1.0190.0Truehttps://ror.org/02wdwnk04Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63646365.03222.00True1.0190.0Truehttps://ror.org/02gq0fg61Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63596360.03222.00True1.0190.0Truehttps://ror.org/01613vh25Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
-

47358 rows × 15 columns

-
- - - - -```python -term_orig['licence'] = term_orig['licence'].fillna(999999) -term_orig['licence'] = term_orig['licence'].astype(int) -term_orig['cost_factor'] = term_orig['cost_factor'].fillna(999999) -term_orig['cost_factor'] = term_orig['cost_factor'].astype(int) -# term_orig['embargo_months'] = term_orig['embargo_months'].fillna(0) -# term_orig['embargo_months'] = term_orig['embargo_months'].astype(int) -term_orig.loc[term_orig['archiving'] == True, 'ir_archiving'] = 1 -term_orig.loc[term_orig['archiving'] == False, 'ir_archiving'] = 0 -term_orig['ir_archiving'] = term_orig['ir_archiving'].fillna(0) -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archiving
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-81946125451688170121.0
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-118714631786122957710807856572614408351.0
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-44106140441472479071.0
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-4928686093300740071.0
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-27818217695488029669357667652881371100.0
...................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31768737782784609585522984959422009563581.0
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-31768737782784609585522984959422009563581.0
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-31768737782784609585522984959422009563581.0
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-31768737782784609585522984959422009563581.0
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-31768737782784609585522984959422009563581.0
-

48678 rows × 16 columns

-
- - - - -```python -term_orig.loc[term_orig['ir_archiving'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archiving
-
- - - - -```python -term_orig['ir_archiving'].value_counts() -``` - - - - - 1.0 47467 - 0.0 1211 - Name: ir_archiving, dtype: int64 - - - - -```python -term_orig['licence'] = term_orig['licence'].astype(int) -term_orig['ir_archiving'] = term_orig['ir_archiving'].astype(int) -term_orig['cost_factor'] = term_orig['cost_factor'].astype(int) -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archiving
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-81946125451688170121
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-118714631786122957710807856572614408351
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-44106140441472479071
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-4928686093300740071
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-27818217695488029669357667652881371100
...................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31768737782784609585522984959422009563581
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-31768737782784609585522984959422009563581
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-31768737782784609585522984959422009563581
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-31768737782784609585522984959422009563581
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-31768737782784609585522984959422009563581
-

48678 rows × 16 columns

-
- - - - -```python -terms_export_dates = term_orig.loc[(term_orig['valid_from'].notna()) | (term_orig['valid_until'].notna())][['id_content_hash', 'ror', 'valid_from', 'valid_until']] -terms_export_dates -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_content_hashrorvalid_fromvalid_until
8595-6020029623494903364https://ror.org/04d8ztx872020-01-012023-12-31
8596-6020029623494903364https://ror.org/02bnkt3222020-01-012023-12-31
8597-6020029623494903364https://ror.org/00zg4za482020-01-012023-12-31
8598-6020029623494903364https://ror.org/02s3760522020-01-012023-12-31
8599-6020029623494903364https://ror.org/05a28rw582020-01-012023-12-31
...............
486737687377827846095855https://ror.org/01swzsf042021-01-012023-12-31
486747687377827846095855https://ror.org/019whta542021-01-012023-12-31
486757687377827846095855https://ror.org/00vasag412021-01-012023-12-31
486767687377827846095855https://ror.org/05r0ap6202021-01-012023-12-31
486777687377827846095855https://ror.org/05pmsvm272021-01-012023-12-31
-

40083 rows × 4 columns

-
- - - - -```python -terms_export = term_orig[['id_sherpa', 'rp_id', 'id_content_hash', 'id_content_hash_licence', 'version', 'cost_factor', 'embargo_months', 'ir_archiving', 'licence', 'comment']] -terms_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...
.................................
48673NaN40079.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
48674NaN40080.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
48675NaN40081.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
48676NaN40082.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
48677NaN40083.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
-

48678 rows × 10 columns

-
- - - - -```python -# test de doublons -terms_export.loc[terms_export.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
66076608.0NaN-9213354388875732238-59750423905724073282999999121999999Institutional archiving locations: Non-Commerc...
65086509.0NaN-9213354388875732238-59750423905724073282999999121999999Institutional archiving locations: Non-Commerc...
12941295.0NaN-9213354388875732238-59750423905724073282999999121999999Institutional archiving locations: Non-Commerc...
55615562.0NaN-9213354388875732238-59750423905724073282999999121999999Institutional archiving locations: Non-Commerc...
55595560.0NaN-9213354388875732238-59750423905724073282999999121999999Institutional archiving locations: Non-Commerc...
.................................
63556356.0NaN9219045216097074691-84278746281403392203222011Institutional archiving locations: Institution...
63546355.0NaN9219045216097074691-84278746281403392203222011Institutional archiving locations: Institution...
63536354.0NaN9219045216097074691-84278746281403392203222011Institutional archiving locations: Institution...
63646365.0NaN9219045216097074691-84278746281403392203222011Institutional archiving locations: Institution...
63596360.0NaN9219045216097074691-84278746281403392203222011Institutional archiving locations: Institution...
-

47358 rows × 10 columns

-
- - - - -```python -terms_export_dedup = terms_export.drop_duplicates(subset=['id_content_hash']) -terms_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...
.................................
8595NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement
26723NaN18129.0-195526209948827643863594828014331812613581011NaN
33439NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement
47344NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement
47758NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...
-

1320 rows × 10 columns

-
- - - - -```python -terms_export_dedup_licence = terms_export.drop_duplicates(subset=['id_content_hash_licence']) -terms_export_dedup_licence -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...
.................................
47344NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement
47758NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...
47988NaN39394.07687377827846095855229849776618844805935816014Cambridge University Press (CUP) Read & Publis...
48218NaN39624.07687377827846095855229848607945021166535816012Cambridge University Press (CUP) Read & Publis...
48448NaN39854.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
-

1590 rows × 10 columns

-
- - - - -```python -# test de doublons -terms_export_dedup_licence.loc[terms_export_dedup_licence.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
15691570.0NaN-9114006443623277513-72733887763620604913413002Non institutional archiving locations: PubMed ...
582583.0NaN-9011072484834895623-59116051124023388893379012Institutional archiving locations: Any Reposit...
85538554.0NaN-886163005461322845471767730880766240153573003Non institutional archiving locations: Funder ...
85528553.0NaN-886163005461322845471767734743964336903573002Non institutional archiving locations: Funder ...
82648265.0NaN-8856152899298491735-12199961119101615613560014Institutional archiving locations: Non-Commerc...
.................................
85608561.0NaN873544693264154295143320462503649956953574002Non institutional archiving locations: Funder ...
85618562.0NaN873544693264154295143320481179378659783574003Non institutional archiving locations: Funder ...
22222223.0NaN87452533838935247195211347029898937223431011Institutional archiving locations: Any Website...
41524153.0NaN884524375673695509861001134560954228313464011Institutional archiving locations: Any Website...
43514352.0NaN9036026380223066491-15394902416656550363470011Institutional archiving locations: Institution...
-

270 rows × 10 columns

-
- - - - -```python -# totaux pour les deux sources -terms_export_dedup.loc[terms_export_dedup['id_sherpa'].notna()].shape[0] -``` - - - - - 1315 - - - - -```python -terms_export_dedup.loc[terms_export_dedup['rp_id'].notna()].shape[0] -``` - - - - - 5 - - - - -```python -terms_export_dedup.loc[terms_export_dedup['rp_id'].notna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
8595NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement
26723NaN18129.0-195526209948827643863594828014331812613581011NaN
33439NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement
47344NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement
47758NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...
-
- - - - -```python -# convertir l'index en id -terms_export_dedup.reset_index(inplace=True) -del terms_export_dedup['index'] -terms_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...
.................................
1315NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement
1316NaN18129.0-195526209948827643863594828014331812613581011NaN
1317NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement
1318NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement
1319NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...
-

1320 rows × 10 columns

-
- - - - -```python -# ajout de l'id avec l'index + 1 -terms_export_dedup['id'] = terms_export_dedup.index + 1 -# del terms_export_dedup['index'] -terms_export_dedup -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecommentid
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...1
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...2
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...3
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...4
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...5
....................................
1315NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement1316
1316NaN18129.0-195526209948827643863594828014331812613581011NaN1317
1317NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement1318
1318NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement1319
1319NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...1320
-

1320 rows × 11 columns

-
- - - - -```python -terms_export_dedup['source'] = '' -terms_export_dedup -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - """Entry point for launching an IPython kernel. - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecommentidsource
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...1
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...2
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...3
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...4
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...5
.......................................
1315NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement1316
1316NaN18129.0-195526209948827643863594828014331812613581011NaN1317
1317NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement1318
1318NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement1319
1319NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...1320
-

1320 rows × 12 columns

-
- - - - -```python -# grouper par licence -terms_export_dedup_licences = terms_export_dedup_licence[['licence', 'id_content_hash']] -terms_export_dedup_licences -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
licenceid_content_hash
0999999-5068777248818105392
1999999-1187146317861229577
21-6827815856646016670
325388365857945903435
4999999-2781821769548802966
.........
4734416747956201225830719
4775817687377827846095855
4798847687377827846095855
4821827687377827846095855
4844857687377827846095855
-

1590 rows × 2 columns

-
- - - - -```python -# concat valeurs avec même id -terms_export_dedup_licences['licence'] = terms_export_dedup_licences['licence'].astype(str) -terms_export_dedup_licences = terms_export_dedup_licences.groupby('id_content_hash').agg({'licence': lambda x: ', '.join(x)}) -terms_export_dedup_licences -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
licence
id_content_hash
-9213354388875732238999999
-9200070744422558377999999
-91717831170231043951
-91349526464689481631
-91330136487514062891
......
91950013304323528931
92004661683459815431
92138788081787292532
92183892089127778822
92190452160970746911
-

1320 rows × 1 columns

-
- - - - -```python -# test des valeur multiples -terms_export_dedup_licences.loc[terms_export_dedup_licences['licence'].str.contains(',')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
licence
id_content_hash
-91140064436232775131, 2
-90110724848348956231, 2
-88616300546132284541, 2, 3
-88561528992984917351, 4
-86071675687205191891, 4
......
87121617774363853901, 4
87354469326415429511, 2, 3
87452533838935247192, 1
88452437567369550982, 1
90360263802230664912, 1
-

185 rows × 1 columns

-
- - - - -```python -# ajout des licences groupées -terms_export_dedup_fin = pd.merge(terms_export_dedup, terms_export_dedup_licences, on='id_content_hash', how='left') -terms_export_dedup_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicence_xcommentidsourcelicence_y
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...1999999
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...2999999
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...31
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...42
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...5999999
..........................................
1315NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement13161, 2
1316NaN18129.0-195526209948827643863594828014331812613581011NaN13171, 4
1317NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement13181, 4, 2
1318NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement13191
1319NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...13201, 4, 2, 5
-

1320 rows × 13 columns

-
- - - - -```python -# merge avec les dates pour avoir les terms ids -terms_export_dates = pd.merge(terms_export_dates, terms_export_dedup_fin[['id_content_hash', 'id']], on='id_content_hash') -terms_export_dates = terms_export_dates.rename(columns = {'id' : 'term'}) -terms_export_dates -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_content_hashrorvalid_fromvalid_untilterm
0-6020029623494903364https://ror.org/04d8ztx872020-01-012023-12-311316
1-6020029623494903364https://ror.org/02bnkt3222020-01-012023-12-311316
2-6020029623494903364https://ror.org/00zg4za482020-01-012023-12-311316
3-6020029623494903364https://ror.org/02s3760522020-01-012023-12-311316
4-6020029623494903364https://ror.org/05a28rw582020-01-012023-12-311316
..................
400787687377827846095855https://ror.org/01swzsf042021-01-012023-12-311320
400797687377827846095855https://ror.org/019whta542021-01-012023-12-311320
400807687377827846095855https://ror.org/00vasag412021-01-012023-12-311320
400817687377827846095855https://ror.org/05r0ap6202021-01-012023-12-311320
400827687377827846095855https://ror.org/05pmsvm272021-01-012023-12-311320
-

40083 rows × 5 columns

-
- - - - -```python -# renommer les champs de licence -del terms_export_dedup_fin['licence_x'] -terms_export_dedup_fin = terms_export_dedup_fin.rename(columns = {'licence_y' : 'licence'}) -``` - - -```python -terms_export_fin = terms_export_dedup_fin[['version', 'cost_factor', 'embargo_months', 'ir_archiving', 'licence', 'comment', 'id', 'source']] -terms_export_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
versioncost_factorembargo_monthsir_archivinglicencecommentidsource
0199999901999999Institutional archiving locations: Non-Commerc...1
12999999121999999Institutional archiving locations: Non-Commerc...2
23355011Institutional archiving locations: Any Website...3
33355012Institutional archiving locations: Any Website...4
4199999900999999Non institutional archiving locations: ChemRxi...5
...........................
13153581011, 2Elsevier Read & Publish agreement1316
13163581011, 4NaN1317
13173581011, 4, 2Wiley Read & Publish agreement1318
13183581011Taylor and Francis Read & Publish agreement1319
131935816011, 4, 2, 5Cambridge University Press (CUP) Read & Publis...1320
-

1320 rows × 8 columns

-
- - - - -```python -# export de la table -result = terms_export_fin.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/term.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -terms_export_fin.to_csv('sample/term.tsv', index=False) -``` - - -```python -# export excel -terms_export_fin.to_excel('sample/term.xlsx', index=False) -``` - -## Table condition_type - - -```python -# Journal-only, Organization-only, Journal-organization agreement -col_names = ['id', - 'condition_issuer' - ] -condition_type = pd.DataFrame(columns = col_names) -condition_type = condition_type.append({'id' : 1, 'condition_issuer' : 'Journal-only'}, ignore_index=True) -condition_type = condition_type.append({'id' : 2, 'condition_issuer' : 'Organization-only'}, ignore_index=True) -condition_type = condition_type.append({'id' : 3, 'condition_issuer' : 'Journal-organization agreement'}, ignore_index=True) -condition_type -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - -
idcondition_issuer
01Journal-only
12Organization-only
23Journal-organization agreement
-
- - - - -```python -# export de la table -result = condition_type.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/condition_type.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -condition_type.to_csv('sample/condition_type.tsv', index=False) -``` - - -```python -# export excel -condition_type.to_excel('sample/condition_type.xlsx', index=False) -``` - -## Table organization - - -```python -# extraction des organizations (funders) -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151NaNNaN
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152NaNNaN
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0355.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0355.0
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421NaNNaN
......................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531NaNNaN
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532NaNNaN
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN580.0
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0580.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0580.0
-

8595 rows × 33 columns

-
- - - - -```python -sherpa.loc[sherpa['prerequisite_funders'].notna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
167890001-49664049https://v2.sherpa.ac.uk/id/publisher_policy/126nonopublishedNaN12NaNTrueNational Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0NaNnamed_repositoryNaNPubMed CentralPubMed CentralNaNNaNdisciplinary (PubMed Central) ;FalseNaNNaN170001-49663NaN357.0
286680002-07291334https://v2.sherpa.ac.uk/id/publisher_policy/1107nonoacceptedNaN12NaNTrueNational Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0NaNnamed_repositoryNaNPubMed CentralPubMed CentralNaNNaNdisciplinary (PubMed Central) ;FalseNaNNaN290002-07292NaNNaN
589850002-934312950https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueWellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08gbhttp://www.wellcome.ac.uk/695.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN590002-934331.0223.0
599850002-934312950https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueBritish Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04gbhttp://www.bhf.org.uk/18.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN600002-934331.0223.0
609850002-934312950https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueVersus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469gbhttps://www.versusarthritis.org/14.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN610002-934331.0223.0
......................................................................................................
85109902211-285520490https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueEuropean Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90behttp://erc.europa.eu/31.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN85112211-285531.0352.0
85119902211-285520490https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueMedical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517gbhttp://www.mrc.ac.uk/index.htm705.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN85122211-285531.0352.0
85129902211-285520490https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueMotor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61gbhttp://www.mndassociation.org/562.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN85132211-285531.0352.0
85139902211-285520490https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueParkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338gbhttp://www.parkinsons.org.uk/411.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN85142211-285531.0352.0
85149902211-285520490https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueTelethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18ithttps://www.telethon.it/en/325.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN85152211-285531.0352.0
-

5585 rows × 33 columns

-
- - - - -```python -sherpa['prerequisite_funders'].value_counts() -``` - - - - - True 5585 - Name: prerequisite_funders, dtype: int64 - - - - -```python -funders = sherpa.loc[sherpa['prerequisite_funders'].notna()][['prerequisite_funders_name', 'prerequisite_funders_fundref', 'prerequisite_funders_ror', 'prerequisite_funders_country', 'prerequisite_funders_url', 'prerequisite_funders_sherpa_id']] -funders -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
prerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_id
16National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0
28National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0
58Wellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08gbhttp://www.wellcome.ac.uk/695.0
59British Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04gbhttp://www.bhf.org.uk/18.0
60Versus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469gbhttps://www.versusarthritis.org/14.0
.....................
8510European Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90behttp://erc.europa.eu/31.0
8511Medical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517gbhttp://www.mrc.ac.uk/index.htm705.0
8512Motor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61gbhttp://www.mndassociation.org/562.0
8513Parkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338gbhttp://www.parkinsons.org.uk/411.0
8514Telethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18ithttps://www.telethon.it/en/325.0
-

5585 rows × 6 columns

-
- - - - -```python -funders_dedup = funders.drop_duplicates(subset='prerequisite_funders_ror') -funders_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
prerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_id
16National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0
58Wellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08gbhttp://www.wellcome.ac.uk/695.0
59British Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04gbhttp://www.bhf.org.uk/18.0
60Versus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469gbhttps://www.versusarthritis.org/14.0
61Biotechnology and Biological Sciences Research...http://dx.doi.org/10.13039/501100000268https://ror.org/00cwqg982gbhttp://www.bbsrc.ac.uk/home/home.aspx709.0
62Blood Cancer UKhttp://dx.doi.org/10.13039/501100007903https://ror.org/0055acf80gbhttps://bloodcancer.org.uk/925.0
63Bill & Melinda Gates Foundationhttp://dx.doi.org/10.13039/100000865https://ror.org/0456r8d26ushttp://www.gatesfoundation.org/961.0
64Cancer Research UKhttp://dx.doi.org/10.13039/501100000289https://ror.org/054225q67gbhttp://www.cancerresearchuk.org/19.0
65Chief Scientist Office, Scottish Executive (CSO)http://dx.doi.org/10.13039/501100000589https://ror.org/01613vh25gbhttp://www.cso.scot.nhs.uk/16.0
66Department of Health (DH)http://dx.doi.org/10.13039/501100000272https://ror.org/0187kwz08gbhttp://www.dh.gov.uk/en/index.htm943.0
67Dunhill Medical Trust (DMT)http://dx.doi.org/10.13039/501100000377https://ror.org/05ayqqv15gbhttps://dunhillmedical.org.uk/410.0
68European Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90behttp://erc.europa.eu/31.0
69Medical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517gbhttp://www.mrc.ac.uk/index.htm705.0
70Motor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61gbhttp://www.mndassociation.org/562.0
71Parkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338gbhttp://www.parkinsons.org.uk/411.0
72Telethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18ithttps://www.telethon.it/en/325.0
99Howard Hughes Medical Institute (HHMI)http://dx.doi.org/10.13039/100000011https://ror.org/006w34k90ushttp://www.hhmi.org/24.0
149Arts and Humanities Research Council (AHRC)http://dx.doi.org/10.13039/501100000267https://ror.org/0505m1554gbhttp://www.ahrc.ac.uk/Pages/Home.aspx698.0
150Austrian Science Fund (FWF)http://dx.doi.org/10.13039/501100002428https://ror.org/013tf3c58athttp://www.fwf.ac.at/en/13.0
153Breast Cancer Nowhttp://dx.doi.org/10.13039/501100007913https://ror.org/02qa92s63gbhttp://breastcancernow.org/1065.0
156Engineering and Physical Sciences Research Cou...http://dx.doi.org/10.13039/501100000266https://ror.org/0439y7842gbhttp://www.epsrc.ac.uk/Pages/default.aspx722.0
159Natural Environment Research Council (NERC)http://dx.doi.org/10.13039/501100000270https://ror.org/02b5d8509gbhttps://nerc.ukri.org/726.0
162Science and Technology Facilities Council (STFC)http://dx.doi.org/10.13039/501100000271https://ror.org/057g20z61gbhttp://www.stfc.ac.uk/716.0
164Vetenskapsrådethttp://dx.doi.org/10.13039/501100004359https://ror.org/03zttf063sehttp://www.vr.se/302.0
165World Health Organization (WHO)http://dx.doi.org/10.13039/100004423https://ror.org/01f80g185chhttp://www.who.int/903.0
166World Bankhttp://dx.doi.org/10.13039/100004421https://ror.org/00ae7jd04ushttp://www.worldbank.org/525.0
167Yorkshire Cancer Researchhttp://dx.doi.org/10.13039/501100002653https://ror.org/02cddnn97gbhttp://www.yorkshirecancerresearch.org.uk/428.0
169Economic and Social Research Council (ESRC)http://dx.doi.org/10.13039/501100000269https://ror.org/03n0ht308gbhttp://www.esrc.ac.uk/717.0
418Higher Education Funding Council for England (...http://dx.doi.org/10.13039/501100000384https://ror.org/02wxr8x18gbhttp://www.hefce.ac.uk/877.0
419Higher Education Funding Council for Wales (HE...http://dx.doi.org/10.13039/501100000383https://ror.org/056y81r79gbhttp://www.hefcw.ac.uk/home/home.aspx881.0
420Scottish Funding Council (SFC)http://dx.doi.org/10.13039/501100000360https://ror.org/056bwcz71gbhttp://www.sfc.ac.uk/887.0
421Department for the Economy, Northern Irelandhttp://dx.doi.org/10.13039/100008303https://ror.org/05w9mt194gbhttps://www.economy-ni.gov.uk/884.0
960Academy of Finlandhttp://dx.doi.org/10.13039/501100002341https://ror.org/05k73zm37fihttps://www.aka.fi/en/1248.0
961Agence Nationale de la Recherche (ANR)http://dx.doi.org/10.13039/501100001665https://ror.org/00rbzpz17frhttp://www.agence-nationale-recherche.fr/30.0
963Fundação para a Ciência e a Tecnologiahttp://dx.doi.org/10.13039/501100001871https://ror.org/00snfqn58pthttp://www.fct.pt/1109.0
964Formashttp://dx.doi.org/10.13039/501100001862https://ror.org/03pjs1y45sehttp://www.formas.se/452.0
967Nederlandse Organisatie voor Wetenschappelijk ...http://dx.doi.org/10.13039/501100003246https://ror.org/04jsz6e67nlhttp://www.nwo.nl/459.0
968Science Foundation Ireland (SFI)http://dx.doi.org/10.13039/501100001602https://ror.org/0271asj38iehttp://www.sfi.ie/210.0
970Research Council of Norwayhttp://dx.doi.org/10.13039/501100005416https://ror.org/00epmv149nohttps://www.forskningsradet.no/en/266.0
971Forskningsrådet för hälsa, arbetsliv och välfä...http://dx.doi.org/10.13039/501100006636https://ror.org/02d290r06sehttp://www.forte.se/455.0
978Innovate UKhttp://dx.doi.org/10.13039/501100000266https://ror.org/05ar5fy68gbhttps://www.gov.uk/government/organisations/in...1267.0
1048Diabetes UKhttp://dx.doi.org/10.13039/501100000361https://ror.org/050rgn017gbhttp://www.diabetes.org.uk/492.0
1052Marie Curiehttp://dx.doi.org/10.13039/501100000654https://ror.org/02aqv1x10gbhttp://www.mariecurie.org.uk/595.0
1055Action on Hearing Losshttp://dx.doi.org/10.13039/501100000703https://ror.org/05w6qh410gbhttp://www.actiononhearingloss.org.uk/412.0
1056Alzheimer's Societyhttp://dx.doi.org/10.13039/501100000320https://ror.org/0472gwq90gbhttp://alzheimers.org.uk/443.0
1063Multiple Sclerosis Societyhttp://dx.doi.org/10.13039/501100000381https://ror.org/043fwdk81gbhttp://www.mssociety.org.uk/745.0
1064Myrovlytis Trusthttp://dx.doi.org/10.13039/501100001291https://ror.org/05bj02613gbhttp://www.myrovlytistrust.org/858.0
1065National Centre for the Replacement, Refinemen...http://dx.doi.org/10.13039/501100000849https://ror.org/02w0kg036gbhttp://www.nc3rs.org.uk/859.0
1072Worldwide Cancer Reseachhttp://dx.doi.org/10.13039/100004423https://ror.org/031tfbz57gbhttp://www.worldwidecancerresearch.org/425.0
2219Canadian Institutes of Health Research (CIHR)http://dx.doi.org/10.13039/501100000024https://ror.org/01gavpb45cahttp://www.cihr-irsc.gc.ca/28.0
5490US Department of Energy (DOE)http://dx.doi.org/10.13039/100000015https://ror.org/01bj3aw27ushttp://energy.gov/962.0
5491Agency for Healthcare Research and Quality (AHRQ)http://dx.doi.org/10.13039/100000133https://ror.org/03jmfdf59ushttp://www.ahrq.gov/index.html981.0
5492Institute of Education Sciences (IES)http://dx.doi.org/10.13039/100005246https://ror.org/04et59085ushttp://ies.ed.gov/291.0
5493National Aeronautics and Space Administration ...http://dx.doi.org/10.13039/100000104https://ror.org/027ka1x80ushttp://science.nasa.gov/986.0
5494National Science Foundation (NSF)http://dx.doi.org/10.13039/100000001https://ror.org/021nxhr62ushttp://www.nsf.gov/354.0
7232Academy of Medical Sciencehttp://dx.doi.org/10.13039/501100000691https://ror.org/00c489v88gbhttps://acmedsci.ac.uk/1125.0
7239Prostate Cancer UKhttp://dx.doi.org/10.13039/501100000771https://ror.org/04dkv6329gbhttp://prostatecanceruk.org/742.0
7240Schweizerischer Nationalfonds zur Förderung de...http://dx.doi.org/10.13039/501100001711https://ror.org/00yjd3n13chhttp://www.snf.ch/de/Seiten/default.aspx25.0
-
- - - - -```python -funders_dedup.shape[0] -``` - - - - - 58 - - - - -```python -# export excel -funders_dedup.to_excel('sample/funders.xlsx', index=False) -``` - - -```python -# export csv -funders_dedup.to_csv('sample/funders.tsv', index=False) -``` - - -```python -# creation du DF -organization_funders = funders_dedup -organization_funders = organization_funders.rename(columns = {'prerequisite_funders_name' : 'name', - 'prerequisite_funders_fundref' : 'fundref', - 'prerequisite_funders_ror' : 'ror', - 'prerequisite_funders_country' : 'iso_code', - 'prerequisite_funders_url' : 'website', - 'prerequisite_funders_sherpa_id' : 'sherpa_id' - }) -organization_funders -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namefundrefroriso_codewebsitesherpa_id
16National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0
58Wellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08gbhttp://www.wellcome.ac.uk/695.0
59British Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04gbhttp://www.bhf.org.uk/18.0
60Versus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469gbhttps://www.versusarthritis.org/14.0
61Biotechnology and Biological Sciences Research...http://dx.doi.org/10.13039/501100000268https://ror.org/00cwqg982gbhttp://www.bbsrc.ac.uk/home/home.aspx709.0
62Blood Cancer UKhttp://dx.doi.org/10.13039/501100007903https://ror.org/0055acf80gbhttps://bloodcancer.org.uk/925.0
63Bill & Melinda Gates Foundationhttp://dx.doi.org/10.13039/100000865https://ror.org/0456r8d26ushttp://www.gatesfoundation.org/961.0
64Cancer Research UKhttp://dx.doi.org/10.13039/501100000289https://ror.org/054225q67gbhttp://www.cancerresearchuk.org/19.0
65Chief Scientist Office, Scottish Executive (CSO)http://dx.doi.org/10.13039/501100000589https://ror.org/01613vh25gbhttp://www.cso.scot.nhs.uk/16.0
66Department of Health (DH)http://dx.doi.org/10.13039/501100000272https://ror.org/0187kwz08gbhttp://www.dh.gov.uk/en/index.htm943.0
67Dunhill Medical Trust (DMT)http://dx.doi.org/10.13039/501100000377https://ror.org/05ayqqv15gbhttps://dunhillmedical.org.uk/410.0
68European Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90behttp://erc.europa.eu/31.0
69Medical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517gbhttp://www.mrc.ac.uk/index.htm705.0
70Motor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61gbhttp://www.mndassociation.org/562.0
71Parkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338gbhttp://www.parkinsons.org.uk/411.0
72Telethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18ithttps://www.telethon.it/en/325.0
99Howard Hughes Medical Institute (HHMI)http://dx.doi.org/10.13039/100000011https://ror.org/006w34k90ushttp://www.hhmi.org/24.0
149Arts and Humanities Research Council (AHRC)http://dx.doi.org/10.13039/501100000267https://ror.org/0505m1554gbhttp://www.ahrc.ac.uk/Pages/Home.aspx698.0
150Austrian Science Fund (FWF)http://dx.doi.org/10.13039/501100002428https://ror.org/013tf3c58athttp://www.fwf.ac.at/en/13.0
153Breast Cancer Nowhttp://dx.doi.org/10.13039/501100007913https://ror.org/02qa92s63gbhttp://breastcancernow.org/1065.0
156Engineering and Physical Sciences Research Cou...http://dx.doi.org/10.13039/501100000266https://ror.org/0439y7842gbhttp://www.epsrc.ac.uk/Pages/default.aspx722.0
159Natural Environment Research Council (NERC)http://dx.doi.org/10.13039/501100000270https://ror.org/02b5d8509gbhttps://nerc.ukri.org/726.0
162Science and Technology Facilities Council (STFC)http://dx.doi.org/10.13039/501100000271https://ror.org/057g20z61gbhttp://www.stfc.ac.uk/716.0
164Vetenskapsrådethttp://dx.doi.org/10.13039/501100004359https://ror.org/03zttf063sehttp://www.vr.se/302.0
165World Health Organization (WHO)http://dx.doi.org/10.13039/100004423https://ror.org/01f80g185chhttp://www.who.int/903.0
166World Bankhttp://dx.doi.org/10.13039/100004421https://ror.org/00ae7jd04ushttp://www.worldbank.org/525.0
167Yorkshire Cancer Researchhttp://dx.doi.org/10.13039/501100002653https://ror.org/02cddnn97gbhttp://www.yorkshirecancerresearch.org.uk/428.0
169Economic and Social Research Council (ESRC)http://dx.doi.org/10.13039/501100000269https://ror.org/03n0ht308gbhttp://www.esrc.ac.uk/717.0
418Higher Education Funding Council for England (...http://dx.doi.org/10.13039/501100000384https://ror.org/02wxr8x18gbhttp://www.hefce.ac.uk/877.0
419Higher Education Funding Council for Wales (HE...http://dx.doi.org/10.13039/501100000383https://ror.org/056y81r79gbhttp://www.hefcw.ac.uk/home/home.aspx881.0
420Scottish Funding Council (SFC)http://dx.doi.org/10.13039/501100000360https://ror.org/056bwcz71gbhttp://www.sfc.ac.uk/887.0
421Department for the Economy, Northern Irelandhttp://dx.doi.org/10.13039/100008303https://ror.org/05w9mt194gbhttps://www.economy-ni.gov.uk/884.0
960Academy of Finlandhttp://dx.doi.org/10.13039/501100002341https://ror.org/05k73zm37fihttps://www.aka.fi/en/1248.0
961Agence Nationale de la Recherche (ANR)http://dx.doi.org/10.13039/501100001665https://ror.org/00rbzpz17frhttp://www.agence-nationale-recherche.fr/30.0
963Fundação para a Ciência e a Tecnologiahttp://dx.doi.org/10.13039/501100001871https://ror.org/00snfqn58pthttp://www.fct.pt/1109.0
964Formashttp://dx.doi.org/10.13039/501100001862https://ror.org/03pjs1y45sehttp://www.formas.se/452.0
967Nederlandse Organisatie voor Wetenschappelijk ...http://dx.doi.org/10.13039/501100003246https://ror.org/04jsz6e67nlhttp://www.nwo.nl/459.0
968Science Foundation Ireland (SFI)http://dx.doi.org/10.13039/501100001602https://ror.org/0271asj38iehttp://www.sfi.ie/210.0
970Research Council of Norwayhttp://dx.doi.org/10.13039/501100005416https://ror.org/00epmv149nohttps://www.forskningsradet.no/en/266.0
971Forskningsrådet för hälsa, arbetsliv och välfä...http://dx.doi.org/10.13039/501100006636https://ror.org/02d290r06sehttp://www.forte.se/455.0
978Innovate UKhttp://dx.doi.org/10.13039/501100000266https://ror.org/05ar5fy68gbhttps://www.gov.uk/government/organisations/in...1267.0
1048Diabetes UKhttp://dx.doi.org/10.13039/501100000361https://ror.org/050rgn017gbhttp://www.diabetes.org.uk/492.0
1052Marie Curiehttp://dx.doi.org/10.13039/501100000654https://ror.org/02aqv1x10gbhttp://www.mariecurie.org.uk/595.0
1055Action on Hearing Losshttp://dx.doi.org/10.13039/501100000703https://ror.org/05w6qh410gbhttp://www.actiononhearingloss.org.uk/412.0
1056Alzheimer's Societyhttp://dx.doi.org/10.13039/501100000320https://ror.org/0472gwq90gbhttp://alzheimers.org.uk/443.0
1063Multiple Sclerosis Societyhttp://dx.doi.org/10.13039/501100000381https://ror.org/043fwdk81gbhttp://www.mssociety.org.uk/745.0
1064Myrovlytis Trusthttp://dx.doi.org/10.13039/501100001291https://ror.org/05bj02613gbhttp://www.myrovlytistrust.org/858.0
1065National Centre for the Replacement, Refinemen...http://dx.doi.org/10.13039/501100000849https://ror.org/02w0kg036gbhttp://www.nc3rs.org.uk/859.0
1072Worldwide Cancer Reseachhttp://dx.doi.org/10.13039/100004423https://ror.org/031tfbz57gbhttp://www.worldwidecancerresearch.org/425.0
2219Canadian Institutes of Health Research (CIHR)http://dx.doi.org/10.13039/501100000024https://ror.org/01gavpb45cahttp://www.cihr-irsc.gc.ca/28.0
5490US Department of Energy (DOE)http://dx.doi.org/10.13039/100000015https://ror.org/01bj3aw27ushttp://energy.gov/962.0
5491Agency for Healthcare Research and Quality (AHRQ)http://dx.doi.org/10.13039/100000133https://ror.org/03jmfdf59ushttp://www.ahrq.gov/index.html981.0
5492Institute of Education Sciences (IES)http://dx.doi.org/10.13039/100005246https://ror.org/04et59085ushttp://ies.ed.gov/291.0
5493National Aeronautics and Space Administration ...http://dx.doi.org/10.13039/100000104https://ror.org/027ka1x80ushttp://science.nasa.gov/986.0
5494National Science Foundation (NSF)http://dx.doi.org/10.13039/100000001https://ror.org/021nxhr62ushttp://www.nsf.gov/354.0
7232Academy of Medical Sciencehttp://dx.doi.org/10.13039/501100000691https://ror.org/00c489v88gbhttps://acmedsci.ac.uk/1125.0
7239Prostate Cancer UKhttp://dx.doi.org/10.13039/501100000771https://ror.org/04dkv6329gbhttp://prostatecanceruk.org/742.0
7240Schweizerischer Nationalfonds zur Förderung de...http://dx.doi.org/10.13039/501100001711https://ror.org/00yjd3n13chhttp://www.snf.ch/de/Seiten/default.aspx25.0
-
- - - - -```python -# lien avec les pays -country = pd.read_csv('sample/country.tsv', encoding='utf-8', header=0, sep='\t') -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameiso_codeid
0AfghanistanAF1
1AlbaniaAL2
2AlgeriaDZ3
3American SamoaAS4
4AndorraAD5
............
246ZambiaZM247
247ZimbabweZW248
248Åland IslandsAX249
249International AgencyOI250
250UNKNOWN__999999
-

251 rows × 3 columns

-
- - - - -```python -# merge avec les pays -organization_funders['iso_code'] = organization_funders['iso_code'].str.upper() -organization_funders['is_funder'] = 1 -organization_funders = pd.merge(organization_funders, country[['iso_code', 'id']], how='left', on='iso_code') -organization_funders -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namefundrefroriso_codewebsitesherpa_idis_funderid
0National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88UShttp://www.nih.gov/9.01236
1Wellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08GBhttp://www.wellcome.ac.uk/695.01234
2British Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04GBhttp://www.bhf.org.uk/18.01234
3Versus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469GBhttps://www.versusarthritis.org/14.01234
4Biotechnology and Biological Sciences Research...http://dx.doi.org/10.13039/501100000268https://ror.org/00cwqg982GBhttp://www.bbsrc.ac.uk/home/home.aspx709.01234
5Blood Cancer UKhttp://dx.doi.org/10.13039/501100007903https://ror.org/0055acf80GBhttps://bloodcancer.org.uk/925.01234
6Bill & Melinda Gates Foundationhttp://dx.doi.org/10.13039/100000865https://ror.org/0456r8d26UShttp://www.gatesfoundation.org/961.01236
7Cancer Research UKhttp://dx.doi.org/10.13039/501100000289https://ror.org/054225q67GBhttp://www.cancerresearchuk.org/19.01234
8Chief Scientist Office, Scottish Executive (CSO)http://dx.doi.org/10.13039/501100000589https://ror.org/01613vh25GBhttp://www.cso.scot.nhs.uk/16.01234
9Department of Health (DH)http://dx.doi.org/10.13039/501100000272https://ror.org/0187kwz08GBhttp://www.dh.gov.uk/en/index.htm943.01234
10Dunhill Medical Trust (DMT)http://dx.doi.org/10.13039/501100000377https://ror.org/05ayqqv15GBhttps://dunhillmedical.org.uk/410.01234
11European Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90BEhttp://erc.europa.eu/31.0121
12Medical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517GBhttp://www.mrc.ac.uk/index.htm705.01234
13Motor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61GBhttp://www.mndassociation.org/562.01234
14Parkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338GBhttp://www.parkinsons.org.uk/411.01234
15Telethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18IThttps://www.telethon.it/en/325.01110
16Howard Hughes Medical Institute (HHMI)http://dx.doi.org/10.13039/100000011https://ror.org/006w34k90UShttp://www.hhmi.org/24.01236
17Arts and Humanities Research Council (AHRC)http://dx.doi.org/10.13039/501100000267https://ror.org/0505m1554GBhttp://www.ahrc.ac.uk/Pages/Home.aspx698.01234
18Austrian Science Fund (FWF)http://dx.doi.org/10.13039/501100002428https://ror.org/013tf3c58AThttp://www.fwf.ac.at/en/13.0114
19Breast Cancer Nowhttp://dx.doi.org/10.13039/501100007913https://ror.org/02qa92s63GBhttp://breastcancernow.org/1065.01234
20Engineering and Physical Sciences Research Cou...http://dx.doi.org/10.13039/501100000266https://ror.org/0439y7842GBhttp://www.epsrc.ac.uk/Pages/default.aspx722.01234
21Natural Environment Research Council (NERC)http://dx.doi.org/10.13039/501100000270https://ror.org/02b5d8509GBhttps://nerc.ukri.org/726.01234
22Science and Technology Facilities Council (STFC)http://dx.doi.org/10.13039/501100000271https://ror.org/057g20z61GBhttp://www.stfc.ac.uk/716.01234
23Vetenskapsrådethttp://dx.doi.org/10.13039/501100004359https://ror.org/03zttf063SEhttp://www.vr.se/302.01214
24World Health Organization (WHO)http://dx.doi.org/10.13039/100004423https://ror.org/01f80g185CHhttp://www.who.int/903.01215
25World Bankhttp://dx.doi.org/10.13039/100004421https://ror.org/00ae7jd04UShttp://www.worldbank.org/525.01236
26Yorkshire Cancer Researchhttp://dx.doi.org/10.13039/501100002653https://ror.org/02cddnn97GBhttp://www.yorkshirecancerresearch.org.uk/428.01234
27Economic and Social Research Council (ESRC)http://dx.doi.org/10.13039/501100000269https://ror.org/03n0ht308GBhttp://www.esrc.ac.uk/717.01234
28Higher Education Funding Council for England (...http://dx.doi.org/10.13039/501100000384https://ror.org/02wxr8x18GBhttp://www.hefce.ac.uk/877.01234
29Higher Education Funding Council for Wales (HE...http://dx.doi.org/10.13039/501100000383https://ror.org/056y81r79GBhttp://www.hefcw.ac.uk/home/home.aspx881.01234
30Scottish Funding Council (SFC)http://dx.doi.org/10.13039/501100000360https://ror.org/056bwcz71GBhttp://www.sfc.ac.uk/887.01234
31Department for the Economy, Northern Irelandhttp://dx.doi.org/10.13039/100008303https://ror.org/05w9mt194GBhttps://www.economy-ni.gov.uk/884.01234
32Academy of Finlandhttp://dx.doi.org/10.13039/501100002341https://ror.org/05k73zm37FIhttps://www.aka.fi/en/1248.0175
33Agence Nationale de la Recherche (ANR)http://dx.doi.org/10.13039/501100001665https://ror.org/00rbzpz17FRhttp://www.agence-nationale-recherche.fr/30.0176
34Fundação para a Ciência e a Tecnologiahttp://dx.doi.org/10.13039/501100001871https://ror.org/00snfqn58PThttp://www.fct.pt/1109.01178
35Formashttp://dx.doi.org/10.13039/501100001862https://ror.org/03pjs1y45SEhttp://www.formas.se/452.01214
36Nederlandse Organisatie voor Wetenschappelijk ...http://dx.doi.org/10.13039/501100003246https://ror.org/04jsz6e67NLhttp://www.nwo.nl/459.01156
37Science Foundation Ireland (SFI)http://dx.doi.org/10.13039/501100001602https://ror.org/0271asj38IEhttp://www.sfi.ie/210.01107
38Research Council of Norwayhttp://dx.doi.org/10.13039/501100005416https://ror.org/00epmv149NOhttps://www.forskningsradet.no/en/266.01166
39Forskningsrådet för hälsa, arbetsliv och välfä...http://dx.doi.org/10.13039/501100006636https://ror.org/02d290r06SEhttp://www.forte.se/455.01214
40Innovate UKhttp://dx.doi.org/10.13039/501100000266https://ror.org/05ar5fy68GBhttps://www.gov.uk/government/organisations/in...1267.01234
41Diabetes UKhttp://dx.doi.org/10.13039/501100000361https://ror.org/050rgn017GBhttp://www.diabetes.org.uk/492.01234
42Marie Curiehttp://dx.doi.org/10.13039/501100000654https://ror.org/02aqv1x10GBhttp://www.mariecurie.org.uk/595.01234
43Action on Hearing Losshttp://dx.doi.org/10.13039/501100000703https://ror.org/05w6qh410GBhttp://www.actiononhearingloss.org.uk/412.01234
44Alzheimer's Societyhttp://dx.doi.org/10.13039/501100000320https://ror.org/0472gwq90GBhttp://alzheimers.org.uk/443.01234
45Multiple Sclerosis Societyhttp://dx.doi.org/10.13039/501100000381https://ror.org/043fwdk81GBhttp://www.mssociety.org.uk/745.01234
46Myrovlytis Trusthttp://dx.doi.org/10.13039/501100001291https://ror.org/05bj02613GBhttp://www.myrovlytistrust.org/858.01234
47National Centre for the Replacement, Refinemen...http://dx.doi.org/10.13039/501100000849https://ror.org/02w0kg036GBhttp://www.nc3rs.org.uk/859.01234
48Worldwide Cancer Reseachhttp://dx.doi.org/10.13039/100004423https://ror.org/031tfbz57GBhttp://www.worldwidecancerresearch.org/425.01234
49Canadian Institutes of Health Research (CIHR)http://dx.doi.org/10.13039/501100000024https://ror.org/01gavpb45CAhttp://www.cihr-irsc.gc.ca/28.0140
50US Department of Energy (DOE)http://dx.doi.org/10.13039/100000015https://ror.org/01bj3aw27UShttp://energy.gov/962.01236
51Agency for Healthcare Research and Quality (AHRQ)http://dx.doi.org/10.13039/100000133https://ror.org/03jmfdf59UShttp://www.ahrq.gov/index.html981.01236
52Institute of Education Sciences (IES)http://dx.doi.org/10.13039/100005246https://ror.org/04et59085UShttp://ies.ed.gov/291.01236
53National Aeronautics and Space Administration ...http://dx.doi.org/10.13039/100000104https://ror.org/027ka1x80UShttp://science.nasa.gov/986.01236
54National Science Foundation (NSF)http://dx.doi.org/10.13039/100000001https://ror.org/021nxhr62UShttp://www.nsf.gov/354.01236
55Academy of Medical Sciencehttp://dx.doi.org/10.13039/501100000691https://ror.org/00c489v88GBhttps://acmedsci.ac.uk/1125.01234
56Prostate Cancer UKhttp://dx.doi.org/10.13039/501100000771https://ror.org/04dkv6329GBhttp://prostatecanceruk.org/742.01234
57Schweizerischer Nationalfonds zur Förderung de...http://dx.doi.org/10.13039/501100001711https://ror.org/00yjd3n13CHhttp://www.snf.ch/de/Seiten/default.aspx25.01215
-
- - - - -```python -organization_funders = organization_funders.rename(columns = {'id' : 'country'}) -organization_funders -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namefundrefroriso_codewebsitesherpa_idis_fundercountry
0National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88UShttp://www.nih.gov/9.01236
1Wellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08GBhttp://www.wellcome.ac.uk/695.01234
2British Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04GBhttp://www.bhf.org.uk/18.01234
3Versus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469GBhttps://www.versusarthritis.org/14.01234
4Biotechnology and Biological Sciences Research...http://dx.doi.org/10.13039/501100000268https://ror.org/00cwqg982GBhttp://www.bbsrc.ac.uk/home/home.aspx709.01234
5Blood Cancer UKhttp://dx.doi.org/10.13039/501100007903https://ror.org/0055acf80GBhttps://bloodcancer.org.uk/925.01234
6Bill & Melinda Gates Foundationhttp://dx.doi.org/10.13039/100000865https://ror.org/0456r8d26UShttp://www.gatesfoundation.org/961.01236
7Cancer Research UKhttp://dx.doi.org/10.13039/501100000289https://ror.org/054225q67GBhttp://www.cancerresearchuk.org/19.01234
8Chief Scientist Office, Scottish Executive (CSO)http://dx.doi.org/10.13039/501100000589https://ror.org/01613vh25GBhttp://www.cso.scot.nhs.uk/16.01234
9Department of Health (DH)http://dx.doi.org/10.13039/501100000272https://ror.org/0187kwz08GBhttp://www.dh.gov.uk/en/index.htm943.01234
10Dunhill Medical Trust (DMT)http://dx.doi.org/10.13039/501100000377https://ror.org/05ayqqv15GBhttps://dunhillmedical.org.uk/410.01234
11European Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90BEhttp://erc.europa.eu/31.0121
12Medical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517GBhttp://www.mrc.ac.uk/index.htm705.01234
13Motor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61GBhttp://www.mndassociation.org/562.01234
14Parkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338GBhttp://www.parkinsons.org.uk/411.01234
15Telethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18IThttps://www.telethon.it/en/325.01110
16Howard Hughes Medical Institute (HHMI)http://dx.doi.org/10.13039/100000011https://ror.org/006w34k90UShttp://www.hhmi.org/24.01236
17Arts and Humanities Research Council (AHRC)http://dx.doi.org/10.13039/501100000267https://ror.org/0505m1554GBhttp://www.ahrc.ac.uk/Pages/Home.aspx698.01234
18Austrian Science Fund (FWF)http://dx.doi.org/10.13039/501100002428https://ror.org/013tf3c58AThttp://www.fwf.ac.at/en/13.0114
19Breast Cancer Nowhttp://dx.doi.org/10.13039/501100007913https://ror.org/02qa92s63GBhttp://breastcancernow.org/1065.01234
20Engineering and Physical Sciences Research Cou...http://dx.doi.org/10.13039/501100000266https://ror.org/0439y7842GBhttp://www.epsrc.ac.uk/Pages/default.aspx722.01234
21Natural Environment Research Council (NERC)http://dx.doi.org/10.13039/501100000270https://ror.org/02b5d8509GBhttps://nerc.ukri.org/726.01234
22Science and Technology Facilities Council (STFC)http://dx.doi.org/10.13039/501100000271https://ror.org/057g20z61GBhttp://www.stfc.ac.uk/716.01234
23Vetenskapsrådethttp://dx.doi.org/10.13039/501100004359https://ror.org/03zttf063SEhttp://www.vr.se/302.01214
24World Health Organization (WHO)http://dx.doi.org/10.13039/100004423https://ror.org/01f80g185CHhttp://www.who.int/903.01215
25World Bankhttp://dx.doi.org/10.13039/100004421https://ror.org/00ae7jd04UShttp://www.worldbank.org/525.01236
26Yorkshire Cancer Researchhttp://dx.doi.org/10.13039/501100002653https://ror.org/02cddnn97GBhttp://www.yorkshirecancerresearch.org.uk/428.01234
27Economic and Social Research Council (ESRC)http://dx.doi.org/10.13039/501100000269https://ror.org/03n0ht308GBhttp://www.esrc.ac.uk/717.01234
28Higher Education Funding Council for England (...http://dx.doi.org/10.13039/501100000384https://ror.org/02wxr8x18GBhttp://www.hefce.ac.uk/877.01234
29Higher Education Funding Council for Wales (HE...http://dx.doi.org/10.13039/501100000383https://ror.org/056y81r79GBhttp://www.hefcw.ac.uk/home/home.aspx881.01234
30Scottish Funding Council (SFC)http://dx.doi.org/10.13039/501100000360https://ror.org/056bwcz71GBhttp://www.sfc.ac.uk/887.01234
31Department for the Economy, Northern Irelandhttp://dx.doi.org/10.13039/100008303https://ror.org/05w9mt194GBhttps://www.economy-ni.gov.uk/884.01234
32Academy of Finlandhttp://dx.doi.org/10.13039/501100002341https://ror.org/05k73zm37FIhttps://www.aka.fi/en/1248.0175
33Agence Nationale de la Recherche (ANR)http://dx.doi.org/10.13039/501100001665https://ror.org/00rbzpz17FRhttp://www.agence-nationale-recherche.fr/30.0176
34Fundação para a Ciência e a Tecnologiahttp://dx.doi.org/10.13039/501100001871https://ror.org/00snfqn58PThttp://www.fct.pt/1109.01178
35Formashttp://dx.doi.org/10.13039/501100001862https://ror.org/03pjs1y45SEhttp://www.formas.se/452.01214
36Nederlandse Organisatie voor Wetenschappelijk ...http://dx.doi.org/10.13039/501100003246https://ror.org/04jsz6e67NLhttp://www.nwo.nl/459.01156
37Science Foundation Ireland (SFI)http://dx.doi.org/10.13039/501100001602https://ror.org/0271asj38IEhttp://www.sfi.ie/210.01107
38Research Council of Norwayhttp://dx.doi.org/10.13039/501100005416https://ror.org/00epmv149NOhttps://www.forskningsradet.no/en/266.01166
39Forskningsrådet för hälsa, arbetsliv och välfä...http://dx.doi.org/10.13039/501100006636https://ror.org/02d290r06SEhttp://www.forte.se/455.01214
40Innovate UKhttp://dx.doi.org/10.13039/501100000266https://ror.org/05ar5fy68GBhttps://www.gov.uk/government/organisations/in...1267.01234
41Diabetes UKhttp://dx.doi.org/10.13039/501100000361https://ror.org/050rgn017GBhttp://www.diabetes.org.uk/492.01234
42Marie Curiehttp://dx.doi.org/10.13039/501100000654https://ror.org/02aqv1x10GBhttp://www.mariecurie.org.uk/595.01234
43Action on Hearing Losshttp://dx.doi.org/10.13039/501100000703https://ror.org/05w6qh410GBhttp://www.actiononhearingloss.org.uk/412.01234
44Alzheimer's Societyhttp://dx.doi.org/10.13039/501100000320https://ror.org/0472gwq90GBhttp://alzheimers.org.uk/443.01234
45Multiple Sclerosis Societyhttp://dx.doi.org/10.13039/501100000381https://ror.org/043fwdk81GBhttp://www.mssociety.org.uk/745.01234
46Myrovlytis Trusthttp://dx.doi.org/10.13039/501100001291https://ror.org/05bj02613GBhttp://www.myrovlytistrust.org/858.01234
47National Centre for the Replacement, Refinemen...http://dx.doi.org/10.13039/501100000849https://ror.org/02w0kg036GBhttp://www.nc3rs.org.uk/859.01234
48Worldwide Cancer Reseachhttp://dx.doi.org/10.13039/100004423https://ror.org/031tfbz57GBhttp://www.worldwidecancerresearch.org/425.01234
49Canadian Institutes of Health Research (CIHR)http://dx.doi.org/10.13039/501100000024https://ror.org/01gavpb45CAhttp://www.cihr-irsc.gc.ca/28.0140
50US Department of Energy (DOE)http://dx.doi.org/10.13039/100000015https://ror.org/01bj3aw27UShttp://energy.gov/962.01236
51Agency for Healthcare Research and Quality (AHRQ)http://dx.doi.org/10.13039/100000133https://ror.org/03jmfdf59UShttp://www.ahrq.gov/index.html981.01236
52Institute of Education Sciences (IES)http://dx.doi.org/10.13039/100005246https://ror.org/04et59085UShttp://ies.ed.gov/291.01236
53National Aeronautics and Space Administration ...http://dx.doi.org/10.13039/100000104https://ror.org/027ka1x80UShttp://science.nasa.gov/986.01236
54National Science Foundation (NSF)http://dx.doi.org/10.13039/100000001https://ror.org/021nxhr62UShttp://www.nsf.gov/354.01236
55Academy of Medical Sciencehttp://dx.doi.org/10.13039/501100000691https://ror.org/00c489v88GBhttps://acmedsci.ac.uk/1125.01234
56Prostate Cancer UKhttp://dx.doi.org/10.13039/501100000771https://ror.org/04dkv6329GBhttp://prostatecanceruk.org/742.01234
57Schweizerischer Nationalfonds zur Förderung de...http://dx.doi.org/10.13039/501100001711https://ror.org/00yjd3n13CHhttp://www.snf.ch/de/Seiten/default.aspx25.01215
-
- - - - -```python -# ajout des organizations suisses -organization = pd.read_csv('ror/ror_ch_hei_export.tsv', encoding='utf-8', header=0, sep='\t', dtype={'fundref': str, 'orgref': str}, na_filter=False) -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
0https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
1https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
2https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
3https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
4https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
5https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
6https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
7https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
8https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
9https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
10https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
11https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
12https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
13https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
14https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
15https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
16https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
17https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
18https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
19https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
20https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
21https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
22https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
23https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
24https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
25https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
26https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
27https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
28https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
29https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
30https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
31https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
32https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
33https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
34https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
35https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
36https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
37https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
38https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
39https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
40https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
41https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
42https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
43https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
44https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
45https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
-
- - - - -```python -# tri par nom -organization = organization.sort_values(by='name') -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
28https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
27https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
23https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
26https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
2https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
3https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
5https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
6https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
7https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
11https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
12https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
9https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
13https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
14https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
16https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
17https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
19https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
22https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
45https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
20https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
21https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
25https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
37https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
39https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
1https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
4https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
38https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
10https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
0https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
40https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
41https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
30https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
31https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
32https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
42https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
43https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
33https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
44https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
18https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
36https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
29https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
35https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
8https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
15https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
34https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
24https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
-
- - - - -```python -organization = organization.reset_index(drop=True) -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
0https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
1https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
2https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
3https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
4https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
5https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
6https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
7https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
8https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
9https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
10https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
11https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
12https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
13https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
14https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
15https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
16https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
17https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
18https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
19https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
20https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
21https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
22https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
23https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
24https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
25https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
26https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
27https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
28https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
29https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
30https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
31https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
32https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
33https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
34https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
35https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
36https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
37https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
38https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
39https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
40https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
41https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
42https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
43https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
44https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
45https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
-
- - - - -```python -# mettre l'EPFL en position 1 et UNIGE en 2 -target_row = 32 -# Move target row to first element of list. -idx = [target_row] + [i for i in range(len(organization)) if i != target_row] -organization = organization.iloc[idx] -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
32https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
0https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
1https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
2https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
3https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
4https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
5https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
6https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
7https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
8https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
9https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
10https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
11https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
12https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
13https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
14https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
15https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
16https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
17https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
18https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
19https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
20https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
21https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
22https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
23https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
24https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
25https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
26https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
27https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
28https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
29https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
30https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
31https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
33https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
34https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
35https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
36https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
37https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
38https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
39https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
40https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
41https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
42https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
43https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
44https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
45https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
-
- - - - -```python -organization = organization.reset_index(drop=True) -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
0https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
1https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
2https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
3https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
4https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
5https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
6https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
7https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
8https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
9https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
10https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
11https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
12https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
13https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
14https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
15https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
16https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
17https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
18https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
19https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
20https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
21https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
22https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
23https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
24https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
25https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
26https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
27https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
28https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
29https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
30https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
31https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
32https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
33https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
34https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
35https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
36https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
37https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
38https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
39https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
40https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
41https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
42https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
43https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
44https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
45https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
-
- - - - -```python -# mettre l'EPFL en position 1 et UNIGE en 2 -target_row = 45 -# Move target row to first element of list. -idx = [target_row] + [i for i in range(len(organization)) if i != target_row] -organization = organization.iloc[idx] -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
45https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
0https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
1https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
2https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
3https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
4https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
5https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
6https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
7https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
8https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
9https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
10https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
11https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
12https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
13https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
14https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
15https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
16https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
17https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
18https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
19https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
20https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
21https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
22https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
23https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
24https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
25https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
26https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
27https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
28https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
29https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
30https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
31https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
32https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
33https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
34https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
35https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
36https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
37https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
38https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
39https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
40https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
41https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
42https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
43https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
44https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
-
- - - - -```python -organization = organization.reset_index(drop=True) -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
0https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
1https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
2https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
3https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
4https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
5https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
6https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
7https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
8https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
9https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
10https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
11https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
12https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
13https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
14https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
15https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
16https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
17https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
18https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
19https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
20https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
21https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
22https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
23https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
24https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
25https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
26https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
27https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
28https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
29https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
30https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
31https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
32https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
33https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
34https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
35https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
36https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
37https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
38https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
39https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
40https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
41https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
42https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
43https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
44https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
45https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
-
- - - - -```python -# ajout des funders -organization = organization.append(organization_funders, ignore_index=True) -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
acronymaliasescountryfundrefgridis_funderisniiso_codelabel_delabel_enlabel_frlabel_itnameorgrefrorsherpa_idstarting_yearwebsitewikidata
0EPFL215501100001703grid.5333.600000000121839049NaNSwiss Federal Institute of Technology in LausanneÉcole Polytechnique Fédérale de Lausanne (EPFL)71968https://ror.org/02s376052NaN1853http://www.epfl.ch/index.en.htmlQ262760
1UNIGESchola Genevensis215501100006389grid.8591.500000 0001 2322 4988NaNUniversité de GenèveUniversità di GinevraUniversity of Geneva (UNIGE)342348https://ror.org/01swzsf04NaN1559https://www.unige.ch/Q503473
2215grid.417771.300000 0004 4681 910XNaNAgroscopehttps://ror.org/04d8ztx87NaN1850https://www.agroscope.admin.ch/agroscope/en/ho...Q397466
3BFH215501100006259grid.424060.400000 0001 0688 6779NaNBerner FachhochschuleHaute école spécialisée bernoiseBern University of Applied Sciences (BFH)4365265https://ror.org/02bnkt322NaN1997http://www.bfh.ch/en/home.htmlQ466455
4ETH ZurichSwiss Federal Institute of Technology in Zuric...215501100003006grid.5801.c00000 0001 2156 2780NaNEidgenössische Technische Hochschule ZürichÉcole Polytechnique Fédérale de ZurichPolitecnico federale di ZurigoETH Zurich (ETH Zurich)210910https://ror.org/05a28rw58NaN1855https://www.ethz.ch/en.htmlQ11942
............................................................
99NaNNaN236http://dx.doi.org/10.13039/100000104NaN1NaNUSNaNNaNNaNNaNNational Aeronautics and Space Administration ...NaNhttps://ror.org/027ka1x80986.0NaNhttp://science.nasa.gov/NaN
100NaNNaN236http://dx.doi.org/10.13039/100000001NaN1NaNUSNaNNaNNaNNaNNational Science Foundation (NSF)NaNhttps://ror.org/021nxhr62354.0NaNhttp://www.nsf.gov/NaN
101NaNNaN234http://dx.doi.org/10.13039/501100000691NaN1NaNGBNaNNaNNaNNaNAcademy of Medical ScienceNaNhttps://ror.org/00c489v881125.0NaNhttps://acmedsci.ac.uk/NaN
102NaNNaN234http://dx.doi.org/10.13039/501100000771NaN1NaNGBNaNNaNNaNNaNProstate Cancer UKNaNhttps://ror.org/04dkv6329742.0NaNhttp://prostatecanceruk.org/NaN
103NaNNaN215http://dx.doi.org/10.13039/501100001711NaN1NaNCHNaNNaNNaNNaNSchweizerischer Nationalfonds zur Förderung de...NaNhttps://ror.org/00yjd3n1325.0NaNhttp://www.snf.ch/de/Seiten/default.aspxNaN
-

104 rows × 19 columns

-
- - - - -```python -# remplacement dans le fundref id qui renvoie vers du JSON seulement -# URL actuel : http://data.crossref.org/fundingdata/funder/10.13039/[fundref id] -# ex : http://dx.doi.org/10.13039/501100007903 -# redirigé sur : http://data.crossref.org/fundingdata/funder/10.13039/501100007903 -# URL des publications financées : https://search.crossref.org/funding?q=[fundref id]&from_ui=yes -# ex : https://search.crossref.org/funding?q=501100003006&from_ui=yes -organization['fundref'] = organization['fundref'].str.replace('http://dx.doi.org/10.13039/', '') -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
acronymaliasescountryfundrefgridis_funderisniiso_codelabel_delabel_enlabel_frlabel_itnameorgrefrorsherpa_idstarting_yearwebsitewikidata
0EPFL215501100001703grid.5333.600000000121839049NaNSwiss Federal Institute of Technology in LausanneÉcole Polytechnique Fédérale de Lausanne (EPFL)71968https://ror.org/02s376052NaN1853http://www.epfl.ch/index.en.htmlQ262760
1UNIGESchola Genevensis215501100006389grid.8591.500000 0001 2322 4988NaNUniversité de GenèveUniversità di GinevraUniversity of Geneva (UNIGE)342348https://ror.org/01swzsf04NaN1559https://www.unige.ch/Q503473
2215grid.417771.300000 0004 4681 910XNaNAgroscopehttps://ror.org/04d8ztx87NaN1850https://www.agroscope.admin.ch/agroscope/en/ho...Q397466
3BFH215501100006259grid.424060.400000 0001 0688 6779NaNBerner FachhochschuleHaute école spécialisée bernoiseBern University of Applied Sciences (BFH)4365265https://ror.org/02bnkt322NaN1997http://www.bfh.ch/en/home.htmlQ466455
4ETH ZurichSwiss Federal Institute of Technology in Zuric...215501100003006grid.5801.c00000 0001 2156 2780NaNEidgenössische Technische Hochschule ZürichÉcole Polytechnique Fédérale de ZurichPolitecnico federale di ZurigoETH Zurich (ETH Zurich)210910https://ror.org/05a28rw58NaN1855https://www.ethz.ch/en.htmlQ11942
............................................................
99NaNNaN236100000104NaN1NaNUSNaNNaNNaNNaNNational Aeronautics and Space Administration ...NaNhttps://ror.org/027ka1x80986.0NaNhttp://science.nasa.gov/NaN
100NaNNaN236100000001NaN1NaNUSNaNNaNNaNNaNNational Science Foundation (NSF)NaNhttps://ror.org/021nxhr62354.0NaNhttp://www.nsf.gov/NaN
101NaNNaN234501100000691NaN1NaNGBNaNNaNNaNNaNAcademy of Medical ScienceNaNhttps://ror.org/00c489v881125.0NaNhttps://acmedsci.ac.uk/NaN
102NaNNaN234501100000771NaN1NaNGBNaNNaNNaNNaNProstate Cancer UKNaNhttps://ror.org/04dkv6329742.0NaNhttp://prostatecanceruk.org/NaN
103NaNNaN215501100001711NaN1NaNCHNaNNaNNaNNaNSchweizerischer Nationalfonds zur Förderung de...NaNhttps://ror.org/00yjd3n1325.0NaNhttp://www.snf.ch/de/Seiten/default.aspxNaN
-

104 rows × 19 columns

-
- - - - -```python -# df pour l'export -organization_export = organization[['name', 'website', 'country', 'starting_year', 'is_funder', 'ror', 'fundref']] -organization_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namewebsitecountrystarting_yearis_funderrorfundref
0École Polytechnique Fédérale de Lausanne (EPFL)http://www.epfl.ch/index.en.html21518530https://ror.org/02s376052501100001703
1University of Geneva (UNIGE)https://www.unige.ch/21515590https://ror.org/01swzsf04501100006389
2Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...21518500https://ror.org/04d8ztx87
3Bern University of Applied Sciences (BFH)http://www.bfh.ch/en/home.html21519970https://ror.org/02bnkt322501100006259
4ETH Zurich (ETH Zurich)https://www.ethz.ch/en.html21518550https://ror.org/05a28rw58501100003006
........................
99National Aeronautics and Space Administration ...http://science.nasa.gov/236NaN1https://ror.org/027ka1x80100000104
100National Science Foundation (NSF)http://www.nsf.gov/236NaN1https://ror.org/021nxhr62100000001
101Academy of Medical Sciencehttps://acmedsci.ac.uk/234NaN1https://ror.org/00c489v88501100000691
102Prostate Cancer UKhttp://prostatecanceruk.org/234NaN1https://ror.org/04dkv6329501100000771
103Schweizerischer Nationalfonds zur Förderung de...http://www.snf.ch/de/Seiten/default.aspx215NaN1https://ror.org/00yjd3n13501100001711
-

104 rows × 7 columns

-
- - - - -```python -# ajout des valeurs vides -organization_export['starting_year'] = organization_export['starting_year'].fillna(0) -organization_export['fundref'] = organization_export['fundref'].fillna('') -organization_export['ror'] = organization_export['ror'].fillna('') -organization_export -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - This is separate from the ipykernel package so we can avoid doing imports until - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - after removing the cwd from sys.path. - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namewebsitecountrystarting_yearis_funderrorfundref
0École Polytechnique Fédérale de Lausanne (EPFL)http://www.epfl.ch/index.en.html21518530https://ror.org/02s376052501100001703
1University of Geneva (UNIGE)https://www.unige.ch/21515590https://ror.org/01swzsf04501100006389
2Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...21518500https://ror.org/04d8ztx87
3Bern University of Applied Sciences (BFH)http://www.bfh.ch/en/home.html21519970https://ror.org/02bnkt322501100006259
4ETH Zurich (ETH Zurich)https://www.ethz.ch/en.html21518550https://ror.org/05a28rw58501100003006
........................
99National Aeronautics and Space Administration ...http://science.nasa.gov/23601https://ror.org/027ka1x80100000104
100National Science Foundation (NSF)http://www.nsf.gov/23601https://ror.org/021nxhr62100000001
101Academy of Medical Sciencehttps://acmedsci.ac.uk/23401https://ror.org/00c489v88501100000691
102Prostate Cancer UKhttp://prostatecanceruk.org/23401https://ror.org/04dkv6329501100000771
103Schweizerischer Nationalfonds zur Förderung de...http://www.snf.ch/de/Seiten/default.aspx21501https://ror.org/00yjd3n13501100001711
-

104 rows × 7 columns

-
- - - - -```python -# ajout de l'id avec l'index + 1 -organization_export['id'] = organization_export.index + 1 -# del terms_export_dedup['index'] -organization_export -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namewebsitecountrystarting_yearis_funderrorfundrefid
0École Polytechnique Fédérale de Lausanne (EPFL)http://www.epfl.ch/index.en.html21518530https://ror.org/02s3760525011000017031
1University of Geneva (UNIGE)https://www.unige.ch/21515590https://ror.org/01swzsf045011000063892
2Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...21518500https://ror.org/04d8ztx873
3Bern University of Applied Sciences (BFH)http://www.bfh.ch/en/home.html21519970https://ror.org/02bnkt3225011000062594
4ETH Zurich (ETH Zurich)https://www.ethz.ch/en.html21518550https://ror.org/05a28rw585011000030065
...........................
99National Aeronautics and Space Administration ...http://science.nasa.gov/23601https://ror.org/027ka1x80100000104100
100National Science Foundation (NSF)http://www.nsf.gov/23601https://ror.org/021nxhr62100000001101
101Academy of Medical Sciencehttps://acmedsci.ac.uk/23401https://ror.org/00c489v88501100000691102
102Prostate Cancer UKhttp://prostatecanceruk.org/23401https://ror.org/04dkv6329501100000771103
103Schweizerischer Nationalfonds zur Förderung de...http://www.snf.ch/de/Seiten/default.aspx21501https://ror.org/00yjd3n13501100001711104
-

104 rows × 8 columns

-
- - - - -```python -# export de la table -result = organization_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/organization.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export excel -organization_export.to_excel('sample/organization.xlsx', index=False) -``` - - -```python -# export csv -organization_export.to_csv('sample/organization.tsv', index=False) -``` - -## Table condition_set_term - - -```python -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archiving
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-81946125451688170121
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-118714631786122957710807856572614408351
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-44106140441472479071
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-4928686093300740071
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-27818217695488029669357667652881371100
...................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31768737782784609585522984959422009563581
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-31768737782784609585522984959422009563581
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-31768737782784609585522984959422009563581
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-31768737782784609585522984959422009563581
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-31768737782784609585522984959422009563581
-

48678 rows × 16 columns

-
- - - - -```python -terms_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecommentidsource
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...1
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...2
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...3
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...4
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...5
.......................................
1315NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement1316
1316NaN18129.0-195526209948827643863594828014331812613581011NaN1317
1317NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement1318
1318NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement1319
1319NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...1320
-

1320 rows × 12 columns

-
- - - - -```python -# merge des terms id -term_orig = pd.merge(term_orig, terms_export_dedup[['id_content_hash', 'id']], on='id_content_hash', how='left') -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archivingid
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-819461254516881701211
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-1187146317861229577108078565726144083512
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-441061404414724790713
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-49286860933007400714
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-278182176954880296693576676528813711005
......................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-317687377827846095855229849594220095635811320
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-317687377827846095855229849594220095635811320
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-317687377827846095855229849594220095635811320
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-317687377827846095855229849594220095635811320
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-317687377827846095855229849594220095635811320
-

48678 rows × 17 columns

-
- - - - -```python -term_orig = term_orig.rename(columns = {'id' : 'term'}) -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archivingterm
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-819461254516881701211
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-1187146317861229577108078565726144083512
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-441061404414724790713
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-49286860933007400714
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-278182176954880296693576676528813711005
......................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-317687377827846095855229849594220095635811320
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-317687377827846095855229849594220095635811320
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-317687377827846095855229849594220095635811320
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-317687377827846095855229849594220095635811320
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-317687377827846095855229849594220095635811320
-

48678 rows × 17 columns

-
- - - - -```python -condition_type -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - -
idcondition_issuer
01Journal-only
12Organization-only
23Journal-organization agreement
-
- - - - -```python -# merge des condition type -term_orig['condition_type'] = 3 -term_orig.loc[term_orig['ror'].isna(), 'condition_type'] = 1 -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archivingtermcondition_type
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-8194612545168817012111
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-11871463178612295771080785657261440835121
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-4410614044147247907131
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-492868609330074007141
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-2781821769548802966935766765288137110051
.........................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-3176873778278460958552298495942200956358113203
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-3176873778278460958552298495942200956358113203
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-3176873778278460958552298495942200956358113203
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-3176873778278460958552298495942200956358113203
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-3176873778278460958552298495942200956358113203
-

48678 rows × 18 columns

-
- - - - -```python -organization_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namewebsitecountrystarting_yearis_funderrorfundrefid
0École Polytechnique Fédérale de Lausanne (EPFL)http://www.epfl.ch/index.en.html21518530https://ror.org/02s3760525011000017031
1University of Geneva (UNIGE)https://www.unige.ch/21515590https://ror.org/01swzsf045011000063892
2Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...21518500https://ror.org/04d8ztx873
3Bern University of Applied Sciences (BFH)http://www.bfh.ch/en/home.html21519970https://ror.org/02bnkt3225011000062594
4ETH Zurich (ETH Zurich)https://www.ethz.ch/en.html21518550https://ror.org/05a28rw585011000030065
...........................
99National Aeronautics and Space Administration ...http://science.nasa.gov/23601https://ror.org/027ka1x80100000104100
100National Science Foundation (NSF)http://www.nsf.gov/23601https://ror.org/021nxhr62100000001101
101Academy of Medical Sciencehttps://acmedsci.ac.uk/23401https://ror.org/00c489v88501100000691102
102Prostate Cancer UKhttp://prostatecanceruk.org/23401https://ror.org/04dkv6329501100000771103
103Schweizerischer Nationalfonds zur Förderung de...http://www.snf.ch/de/Seiten/default.aspx21501https://ror.org/00yjd3n13501100001711104
-

104 rows × 8 columns

-
- - - - -```python -# merge des organizations -term_orig = pd.merge(term_orig, organization_export[['ror', 'id']], on='ror', how='left') -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archivingtermcondition_typeid
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-8194612545168817012111NaN
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-11871463178612295771080785657261440835121NaN
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-4410614044147247907131NaN
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-492868609330074007141NaN
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-2781821769548802966935766765288137110051NaN
............................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31768737782784609585522984959422009563581132032.0
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-317687377827846095855229849594220095635811320335.0
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-317687377827846095855229849594220095635811320338.0
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-317687377827846095855229849594220095635811320346.0
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-317687377827846095855229849594220095635811320343.0
-

48678 rows × 19 columns

-
- - - - -```python -term_orig = term_orig.rename(columns = {'id' : 'organization'}) -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archivingtermcondition_typeorganization
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-8194612545168817012111NaN
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-11871463178612295771080785657261440835121NaN
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-4410614044147247907131NaN
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-492868609330074007141NaN
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-2781821769548802966935766765288137110051NaN
............................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31768737782784609585522984959422009563581132032.0
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-317687377827846095855229849594220095635811320335.0
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-317687377827846095855229849594220095635811320338.0
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-317687377827846095855229849594220095635811320346.0
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-317687377827846095855229849594220095635811320343.0
-

48678 rows × 19 columns

-
- - - - -```python -# concat valeurs avec même id -condition_set_term_dedup_terms = term_orig[['term', 'id_content_hash']] -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms.drop_duplicates() -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms_dedup.loc[condition_set_term_dedup_terms_dedup['term'].notna()] -condition_set_term_dedup_terms_dedup['term'] = condition_set_term_dedup_terms_dedup['term'].astype(int) -condition_set_term_dedup_terms_dedup['term'] = condition_set_term_dedup_terms_dedup['term'].astype(str) -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms_dedup.groupby('id_content_hash').agg({'term': lambda x: ', '.join(x)}) -condition_set_term_dedup_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
term
id_content_hash
-9213354388875732238271
-92000707444225583771039
-91717831170231043951175
-91349526464689481631283
-91330136487514062891106
......
91950013304323528931103
9200466168345981543250
9213878808178729253580
921838920891277788238
9219045216097074691919
-

1320 rows × 1 columns

-
- - - - -```python -# concat valeurs avec même id -condition_set_term_dedup_journals = term_orig[['journal', 'id_content_hash']] -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals.drop_duplicates() -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals_dedup.loc[condition_set_term_dedup_journals_dedup['journal'].notna()] -condition_set_term_dedup_journals_dedup['journal'] = condition_set_term_dedup_journals_dedup['journal'].astype(int) -condition_set_term_dedup_journals_dedup['journal'] = condition_set_term_dedup_journals_dedup['journal'].astype(str) -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals_dedup.groupby('id_content_hash').agg({'journal': lambda x: ', '.join(x)}) -condition_set_term_dedup_journals_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journal
id_content_hash
-9213354388875732238342, 219, 18, 918, 309, 543, 642, 27, 246, 64,...
-9200070744422558377427
-9171783117023104395548, 240, 298, 132, 3, 516
-9134952646468948163990
-9133013648751406289366
......
9195001330432352893687
9200466168345981543230
9213878808178729253722
9218389208912777882199
9219045216097074691190
-

1320 rows × 1 columns

-
- - - - -```python -# concat valeurs avec même id -condition_set_term_dedup_organizations = term_orig[['organization', 'id_content_hash']] -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations.drop_duplicates() -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations_dedup.loc[condition_set_term_dedup_organizations_dedup['organization'].notna()] -condition_set_term_dedup_organizations_dedup['organization'] = condition_set_term_dedup_organizations_dedup['organization'].astype(int) -condition_set_term_dedup_organizations_dedup['organization'] = condition_set_term_dedup_organizations_dedup['organization'].astype(str) -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations_dedup.groupby('id_content_hash').agg({'organization': lambda x: ', '.join(x)}) -condition_set_term_dedup_organizations_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
organization
id_content_hash
-921335438887573223875, 76, 77, 78
-920007074442255837747
-913495264646894816348, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
-913301364875140628948, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
-908512951995045593848, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
......
874525338389352471948, 64, 51, 74, 68, 67, 69, 59
891340129846520381148, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
899944714990810149548, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
919500133043235289348, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
921904521609707469148, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
-

277 rows × 1 columns

-
- - - - -```python -# concat valeurs avec même id : pas possible pour condition_type -condition_set_term_dedup_condition_types = term_orig[['condition_type', 'id_content_hash']] -condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types.drop_duplicates() -condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types_dedup.loc[condition_set_term_dedup_condition_types_dedup['condition_type'].notna()] -# condition_set_term_dedup_condition_types_dedup['condition_type'] = condition_set_term_dedup_condition_types_dedup['condition_type'].astype(int) -# condition_set_term_dedup_condition_types_dedup['condition_type'] = condition_set_term_dedup_condition_types_dedup['condition_type'].astype(str) -# condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types_dedup.groupby('id_content_hash').agg({'condition_type': lambda x: ', '.join(x)}) -condition_set_term_dedup_condition_types_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeid_content_hash
01-5068777248818105392
11-1187146317861229577
21-6827815856646016670
315388365857945903435
41-2781821769548802966
.........
334393-681455397323083870
4734436747956201225830719
4736216747956201225830719
4775837687377827846095855
4777617687377827846095855
-

1533 rows × 2 columns

-
- - - - -```python -# recuperation des ids groupés -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_terms_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_journals_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_organizations_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_condition_types_dedup, on='id_content_hash', how='left') -terms_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecommentidsourcetermjournalorganizationcondition_type
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...11532, 482, 452, 663, 323, 674, 317, 154, 439, 5...NaN1
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...22532, 482, 452, 663, 323, 674, 317, 154, 439, 5...NaN1
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...33532NaN1
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...44532NaN1
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...55498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...NaN1
...................................................
1528NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement13181318942, 854, 933, 297, 130, 144, 549, 283, 512, 1...3, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...3
1529NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement13191319714, 633, 48, 704, 408, 535, 754, 581, 9793, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...3
1530NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement13191319714, 633, 48, 704, 408, 535, 754, 581, 9793, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1
1531NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...13201320866, 171, 186, 839, 5923, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...3
1532NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...13201320866, 171, 186, 839, 5923, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1
-

1533 rows × 16 columns

-
- - - - -```python -condition_sets_orig = terms_export_dedup[['term', 'condition_type', 'organization', 'journal']] -condition_sets_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
termcondition_typeorganizationjournal
011NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...
121NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...
231NaN532
341NaN532
451NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...
...............
1528131833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...
1529131933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979
1530131913, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979
1531132033, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592
1532132013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592
-

1533 rows × 4 columns

-
- - - - -```python -# ajout d'un hash unique pour chaque variante -condition_sets_orig['id_term_hash'] = condition_sets_orig.apply(lambda x: hash(tuple(x[['condition_type', 'organization', 'journal']])), axis = 1) -condition_sets_orig -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
termcondition_typeorganizationjournalid_term_hash
011NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-5197283134070040275
121NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-5197283134070040275
231NaN532-3428409893954144223
341NaN532-3428409893954144223
451NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...5362274893926121442
..................
1528131833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-32115995447722756
1529131933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 9794789694892756018439
1530131913, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 9797722626036678389533
1531132033, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 5926902392350219571553
1532132013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 5924611302665250055299
-

1533 rows × 5 columns

-
- - - - -```python -# grouper les termes qui ont les mêmes valeurs pour le reste -condition_sets_orig_terms = condition_sets_orig[['term', 'id_term_hash']] -condition_sets_orig_terms_dedup = condition_sets_orig_terms.drop_duplicates() -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['term'].notna()] -condition_sets_orig_terms_dedup['term'] = condition_sets_orig_terms_dedup['term'].astype(int) -condition_sets_orig_terms_dedup['term'] = condition_sets_orig_terms_dedup['term'].astype(str) -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.groupby('id_term_hash').agg({'term': lambda x: ', '.join(x)}) -condition_sets_orig_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
term
id_term_hash
-9221122160312283608796
-9194263828544732083812
-91929449611264080891246
-9191653994283170820965
-91807822994803644411185
......
9197647807999611822421
9200686802301911565359
92032187412307672131056
9211734360905731286630, 631
9214772761176685077706
-

1149 rows × 1 columns

-
- - - - -```python -# ajout des ids groupées -condition_sets_orig_terms = pd.merge(condition_sets_orig, condition_sets_orig_terms_dedup, on='id_term_hash', how='left') -condition_sets_orig_terms -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
term_xcondition_typeorganizationjournalid_term_hashterm_y
011NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
121NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
231NaN532-34284098939541442233, 4
341NaN532-34284098939541442233, 4
451NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 6
.....................
1528131833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-321159954477227561318
1529131933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97947896948927560184391319
1530131913, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97977226260366783895331319
1531132033, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59269023923502195715531320
1532132013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59246113026652500552991320
-

1533 rows × 6 columns

-
- - - - -```python -# rename terms -del condition_sets_orig_terms['term_x'] -condition_sets_orig_terms = condition_sets_orig_terms.rename(columns = {'term_y' : 'term'}) -condition_sets_orig_terms -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashterm
01NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
11NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
21NaN532-34284098939541442233, 4
31NaN532-34284098939541442233, 4
41NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 6
..................
152833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-321159954477227561318
152933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97947896948927560184391319
153013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97977226260366783895331319
153133, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59269023923502195715531320
153213, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59246113026652500552991320
-

1533 rows × 5 columns

-
- - - - -```python -# test duplicates -condition_sets_orig_terms.loc[condition_sets_orig_terms.duplicated()].sort_values(by='term') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashterm
11NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
11871NaN779-91040221086658593781001, 1002, 1003
11881NaN779-91040221086658593781001, 1002, 1003
11901NaN7, 22-57959714025828680511004, 1005
11941NaN825-29857252040668413361008, 1009
..................
11611NaN855158530994336307876978, 979
11681NaN654-5164377982436891368984, 985
11791NaN751-1857992192228010123993, 994, 995
11801NaN751-1857992192228010123993, 994, 995
11821NaN531-3353627437951234546996, 997
-

384 rows × 5 columns

-
- - - - -```python -condition_sets_orig_terms.loc[condition_sets_orig_terms.duplicated()].shape[0] -``` - - - - - 384 - - - - -```python -condition_sets_orig_terms_dedup = condition_sets_orig_terms.drop_duplicates() -condition_sets_orig_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashterm
01NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
21NaN532-34284098939541442233, 4
41NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 6
61NaN498-7139474688484852577, 8
81NaN789-53320450395728364569, 10, 11, 12
..................
152833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-321159954477227561318
152933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97947896948927560184391319
153013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97977226260366783895331319
153133, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59269023923502195715531320
153213, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59246113026652500552991320
-

1149 rows × 5 columns

-
- - - - -```python -# ajout des champs manquants -condition_sets_orig_terms_dedup['comment'] = '' -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - -```python -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['journal'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcomment
-
- - - - -```python -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['term'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcomment
-
- - - - -```python -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['condition_type'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcomment
-
- - - - -```python -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['organization'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcomment
01NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
21NaN532-34284098939541442233, 4
41NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 6
61NaN498-7139474688484852577, 8
81NaN789-53320450395728364569, 10, 11, 12
.....................
15151NaN87030318528692284251371306, 1307
15171NaN41-79020561546065098061308, 1309
15191NaN8076578672144179594851310, 1311
15211NaN53373038623529842952821312, 1313
15231NaN60865480185615639066771314, 1315
-

661 rows × 6 columns

-
- - - - -```python -# remplacement des "nan" -condition_sets_orig_terms_dedup['organization'] = condition_sets_orig_terms_dedup['organization'].fillna('') -condition_sets_orig_terms_dedup -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcomment
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
21532-34284098939541442233, 4
41498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 6
61498-7139474688484852577, 8
81789-53320450395728364569, 10, 11, 12
.....................
152833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-321159954477227561318
152933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97947896948927560184391319
153013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97977226260366783895331319
153133, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59269023923502195715531320
153213, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59246113026652500552991320
-

1149 rows × 6 columns

-
- - - - -```python -# convertir l'index en id -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.reset_index() -# ajout de l'id avec l'index + 1 -condition_sets_orig_terms_dedup['id'] = condition_sets_orig_terms_dedup['index'] + 1 -del condition_sets_orig_terms_dedup['index'] -condition_sets_orig_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcommentid
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 21
11532-34284098939541442233, 43
21498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 65
31498-7139474688484852577, 87
41789-53320450395728364569, 10, 11, 129
........................
114433, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-3211599544772275613181529
114533, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979478969489275601843913191530
114613, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979772262603667838953313191531
114733, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592690239235021957155313201532
114813, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592461130266525005529913201533
-

1149 rows × 7 columns

-
- - - - -```python -# convertir l'index en id -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.reset_index() -# ajout de l'id avec l'index + 1 -condition_sets_orig_terms_dedup['id'] = condition_sets_orig_terms_dedup['index'] + 1 -del condition_sets_orig_terms_dedup['index'] -condition_sets_orig_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcommentid
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 21
11532-34284098939541442233, 42
21498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 63
31498-7139474688484852577, 84
41789-53320450395728364569, 10, 11, 125
........................
114433, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-3211599544772275613181145
114533, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979478969489275601843913191146
114613, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979772262603667838953313191147
114733, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592690239235021957155313201148
114813, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592461130266525005529913201149
-

1149 rows × 7 columns

-
- - - - -```python -# export de la table -result = condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/condition_set.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export excel -condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_excel('sample/condition_set.xlsx', index=False) -``` - - -```python -# export csv -condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_csv('sample/condition_set.tsv', index=False) -``` - -## Table organization_condition_set - - -```python -condition_sets_orig_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcommentid
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 21
11532-34284098939541442233, 42
21498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 63
31498-7139474688484852577, 84
41789-53320450395728364569, 10, 11, 125
........................
114433, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-3211599544772275613181145
114533, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979478969489275601843913191146
114613, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979772262603667838953313191147
114733, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592690239235021957155313201148
114813, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592461130266525005529913201149
-

1149 rows × 7 columns

-
- - - - -```python -condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['organization'].notna()) & (condition_sets_orig_terms_dedup['organization'] != '')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcommentid
5347789-6118989085408562349136
11347668, 576, 371, 410, 849, 184, 670, 559, 58, 16...70263764888625437962212
12147668, 576, 371, 410, 849, 184, 670, 559, 58, 16...88994974481300366982213
21148, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 78985, 485, 787, 415, 189, 395, 652, 83, 227, 44...35305052837971392764222
22348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 78985, 485, 787, 415, 189, 395, 652, 83, 227, 44...30564024657118466664223
........................
114433, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-3211599544772275613181145
114533, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979478969489275601843913191146
114613, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979772262603667838953313191147
114733, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592690239235021957155313201148
114813, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592461130266525005529913201149
-

488 rows × 7 columns

-
- - - - -```python -# creation du DF -# col_names = ['id', -# 'organization', -# 'condition_set', -# 'valid_from', -# 'valid_until' -# ] -# organization_condition = pd.DataFrame(columns = col_names) -organization_condition = condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['organization'].notna()) & (condition_sets_orig_terms_dedup['organization'] != '')][['id', 'organization', 'term']] -organization_condition -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationterm
564713
11124722
12134722
212248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
222348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
............
114411453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1318
114511463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114611473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114711483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
114811493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
-

488 rows × 3 columns

-
- - - - -```python -# extraction des terms ids -organization_condition_split = organization_condition.assign(term = organization_condition.term.str.split(',')).explode('term') -organization_condition_split -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationterm
564713
11124722
12134722
212248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
222348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
............
114411453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1318
114511463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114611473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114711483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
114811493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
-

490 rows × 3 columns

-
- - - - -```python -organization_condition_split.loc[organization_condition_split['organization'].isna()] -``` - - - - -
- - - - - - - - - - - - -
idorganizationterm
-
- - - - -```python -organization_condition_split.loc[organization_condition_split['term'].isna()] -``` - - - - -
- - - - - - - - - - - - -
idorganizationterm
-
- - - - -```python -organization_condition_split['term'] = organization_condition_split['term'].astype(int) -organization_condition_split -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationterm
564713
11124722
12134722
212248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
222348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
............
114411453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1318
114511463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114611473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114711483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
114811493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
-

490 rows × 3 columns

-
- - - - -```python -# ajout du ROR -terms_export_dates -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_content_hashrorvalid_fromvalid_untilterm
0-6020029623494903364https://ror.org/04d8ztx872020-01-012023-12-311316
1-6020029623494903364https://ror.org/02bnkt3222020-01-012023-12-311316
2-6020029623494903364https://ror.org/00zg4za482020-01-012023-12-311316
3-6020029623494903364https://ror.org/02s3760522020-01-012023-12-311316
4-6020029623494903364https://ror.org/05a28rw582020-01-012023-12-311316
..................
400787687377827846095855https://ror.org/01swzsf042021-01-012023-12-311320
400797687377827846095855https://ror.org/019whta542021-01-012023-12-311320
400807687377827846095855https://ror.org/00vasag412021-01-012023-12-311320
400817687377827846095855https://ror.org/05r0ap6202021-01-012023-12-311320
400827687377827846095855https://ror.org/05pmsvm272021-01-012023-12-311320
-

40083 rows × 5 columns

-
- - - - -```python -# merge pour obtenir les dates -organization_condition_split = pd.merge(organization_condition_split, terms_export_dates[['term', 'valid_from', 'valid_until']], on='term', how='left') -organization_condition_split -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationtermvalid_fromvalid_until
064713NaNNaN
1124722NaNNaN
2134722NaNNaN
32248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
42348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
..................
4861011493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
4861111493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
4861211493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
4861311493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
4861411493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
-

48615 rows × 5 columns

-
- - - - -```python -# dédoublonage -organization_condition_split_dedup = organization_condition_split.drop_duplicates() -organization_condition_split_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationtermvalid_fromvalid_until
064713NaNNaN
1124722NaNNaN
2134722NaNNaN
32248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
42348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
..................
3204211453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13182021-01-012024-12-31
4594711463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
4636111473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
4677511483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
4769511493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
-

490 rows × 5 columns

-
- - - - -```python -organization_condition = pd.merge(organization_condition, organization_condition_split_dedup[['id', 'valid_from', 'valid_until']], on='id', how='left') -organization_condition -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationtermvalid_fromvalid_until
064713NaNNaN
1124722NaNNaN
2134722NaNNaN
32248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
42348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
..................
48511453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13182021-01-012024-12-31
48611463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
48711473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
48811483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
48911493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
-

490 rows × 5 columns

-
- - - - -```python -organization_condition = organization_condition.rename(columns = {'id' : 'condition_set'}) -organization_condition['valid_from'] = organization_condition['valid_from'].fillna('') -organization_condition['valid_until'] = organization_condition['valid_until'].fillna('') -organization_condition -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setorganizationtermvalid_fromvalid_until
064713
1124722
2134722
32248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
42348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
..................
48511453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13182021-01-012024-12-31
48611463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
48711473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
48811483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
48911493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
-

490 rows × 5 columns

-
- - - - -```python -# split final pour avoir une ligne par organization -organization_condition_fin = organization_condition.assign(organization = organization_condition.organization.str.split(',')).explode('organization') -organization_condition_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setorganizationtermvalid_fromvalid_until
064713
1124722
2134722
3224842
3226442
..................
4891149213202021-01-012023-12-31
48911493513202021-01-012023-12-31
48911493813202021-01-012023-12-31
48911494613202021-01-012023-12-31
48911494313202021-01-012023-12-31
-

6834 rows × 5 columns

-
- - - - -```python -# ajout de l'id avec l'index + 1 -organization_condition_fin = organization_condition_fin.reset_index() -organization_condition_fin['id'] = organization_condition_fin.index + 1 -del organization_condition_fin['index'] -organization_condition_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setorganizationtermvalid_fromvalid_untilid
0647131
11247222
21347223
32248424
42264425
.....................
68291149213202021-01-012023-12-316830
683011493513202021-01-012023-12-316831
683111493813202021-01-012023-12-316832
683211494613202021-01-012023-12-316833
683311494313202021-01-012023-12-316834
-

6834 rows × 6 columns

-
- - - - -```python -# export de la table -result = organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/organization_condition.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export excel -organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_excel('sample/organization_condition.xlsx', index=False) -``` - - -```python -# export csv -organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_csv('sample/organization_condition.tsv', index=False) -``` - -## Table journal_condition_set - - -```python -# creation du DF -# col_names = ['id', -# 'journal', -# 'condition_set', -# 'valid_from', -# 'valid_until' -# ] -# journal_condition = pd.DataFrame(columns = col_names) -journal_condition = condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['journal'].notna()) & (condition_sets_orig_terms_dedup['journal'] != '')][['id', 'journal']] -journal_condition -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idjournal
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...
12532
23498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...
34498
45789
.........
11441145942, 854, 933, 297, 130, 144, 549, 283, 512, 1...
11451146714, 633, 48, 704, 408, 535, 754, 581, 979
11461147714, 633, 48, 704, 408, 535, 754, 581, 979
11471148866, 171, 186, 839, 592
11481149866, 171, 186, 839, 592
-

1149 rows × 2 columns

-
- - - - -```python -journal_condition = journal_condition.rename(columns = {'id' : 'condition_set'}) -journal_condition['valid_from'] = '' -journal_condition['valid_until'] = '' -journal_condition -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setjournalvalid_fromvalid_until
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...
12532
23498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...
34498
45789
...............
11441145942, 854, 933, 297, 130, 144, 549, 283, 512, 1...
11451146714, 633, 48, 704, 408, 535, 754, 581, 979
11461147714, 633, 48, 704, 408, 535, 754, 581, 979
11471148866, 171, 186, 839, 592
11481149866, 171, 186, 839, 592
-

1149 rows × 4 columns

-
- - - - -```python -# split final pour avoir une ligne par journal -journal_condition_fin = journal_condition.assign(journal = journal_condition.journal.str.split(',')).explode('journal') -journal_condition_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setjournalvalid_fromvalid_until
01532
01482
01452
01663
01323
...............
11481149866
11481149171
11481149186
11481149839
11481149592
-

3033 rows × 4 columns

-
- - - - -```python -# ajout de l'id avec l'index + 1 -journal_condition_fin = journal_condition_fin.reset_index() -journal_condition_fin['id'] = journal_condition_fin.index + 1 -del journal_condition_fin['index'] -journal_condition_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setjournalvalid_fromvalid_untilid
015321
114822
214523
316634
413235
..................
302811498663029
302911491713030
303011491863031
303111498393032
303211495923033
-

3033 rows × 5 columns

-
- - - - -```python -# export de la table -result = journal_condition_fin.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/journal_condition.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export excel -journal_condition_fin.to_excel('sample/journal_condition.xlsx', index=False) -``` - - -```python -# export csv -journal_condition_fin.to_csv('sample/journal_condition.tsv', index=False) -``` - - -```python - -``` diff --git a/import_scripts/10_oacct_terms.py b/import_scripts/10_oacct_terms.py deleted file mode 100644 index bb7de41f..00000000 --- a/import_scripts/10_oacct_terms.py +++ /dev/null @@ -1,1975 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet de modifier les données extraites des differentes sources et les exporter dans les tables de l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 08.09.2021 - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) -# definir le debut des ids -id_start = 1 - - -# ## Import du fichier extrait de Sherpa - -# In[2]: - - -sherpa = pd.read_csv('sample/sherpa_policies_brut.tsv', encoding='utf-8', header=0, sep='\t') -sherpa - - -# In[3]: - - -# test des valeurs pour les versions -sherpa['article_version'].value_counts() - - -# In[4]: - - -# test des valeurs pour les issns -sherpa.loc[sherpa['issn'].isna()] - - -# In[5]: - - -# ajout des ISSN-L -issns = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issns - - -# In[6]: - - -# renommer les colonnes -issns = issns.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -issns - - -# In[7]: - - -# merge avec la table sherpa -sherpa = pd.merge(sherpa, issns, on='issn', how='left') -sherpa - - -# In[8]: - - -# test des valeurs pour les issnl -sherpa.loc[sherpa['issnl'].isna()] - - -# In[9]: - - -# extraction des données IR Archiving + Embargo par ISSN -sherpa_ir = sherpa[['issnl', ]] - - -# ## Import du fichier des licences Read & Publish - -# In[10]: - - -rp = pd.read_csv('sample/read_publish_brut_merge.tsv', encoding='utf-8', header=0, sep='\t') -rp - - -# In[11]: - - -rp['embargo_months'].value_counts() - - -# In[12]: - - -# ajout de l'éditeur dans un seul champ -# rp.loc[rp['Elsevier'] == 'x', 'public_notes'] = 'Elsevier Read & Publish agreement' -rp.loc[rp['Elsevier'] == 'x', 'rp_publisher'] = 'Elsevier' -rp.loc[rp['Springer Nature'] == 'x', 'rp_publisher'] = 'Springer Nature' -rp.loc[rp['Wiley'] == 'x', 'rp_publisher'] = 'Wiley' -rp.loc[rp['TF'] == 'x', 'rp_publisher'] = 'TF' -rp.loc[rp['CUP'] == 'x', 'rp_publisher'] = 'CUP' -rp - - -# In[13]: - - -# test des valeurs pour les versions -rp['rp_publisher'].value_counts() - - -# In[14]: - - -# test des valeurs pour les versions -rp['license'].value_counts() - - -# In[15]: - - -# supprimer les champs inutiles et renommer les colonnes -del rp['Elsevier'] -del rp['Springer Nature'] -del rp['Wiley'] -del rp['TF'] -del rp['CUP'] -del rp['URL'] -rp - - -# In[16]: - - -# renommer les colonnes -rp = rp.rename(columns = {'Title' : 'title', 'ROR' : 'ror', 'read_publish_id' : 'rp_id'}) -rp - - -# ## Table applicable_version - -# In[17]: - - -# creation du DF -col_names = ['id', - 'type', - 'description' - ] -applicable_version = pd.DataFrame(columns = col_names) -# 3 values : published, accepted, submitted -new_row1 = {'id':1, 'type':'submitted', 'description' : 'Submitted version'} -new_row2 = {'id':2, 'type':'accepted', 'description' : 'Accepted version'} -new_row3 = {'id':3, 'type':'published', 'description' : 'Published version'} -#append row to the dataframe -applicable_version = applicable_version.append(new_row1, ignore_index=True) -applicable_version = applicable_version.append(new_row2, ignore_index=True) -applicable_version = applicable_version.append(new_row3, ignore_index=True) -applicable_version - - -# In[18]: - - -# ajout de la valeur UNKNOWN -applicable_version = applicable_version.append({'id' : 999999, 'type' : 'UNKNOWN', 'description' : 'UNKNOWN'}, ignore_index=True) -applicable_version - - -# In[19]: - - -# renommage des champs finaux -applicable_version_export = applicable_version[['id', 'description']] - - -# In[20]: - - -# export de la table applicable_version -result = applicable_version_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/version.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[21]: - - -# export csv -applicable_version_export.to_csv('sample/version.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[22]: - - -# export excel -applicable_version_export.to_excel('sample/version.xlsx', index=False) - - -# In[23]: - - -# merge avec la table sherpa -sherpa = pd.merge(sherpa, applicable_version[['id', 'type']], left_on='article_version', right_on='type', how='left') -sherpa - - -# In[24]: - - -sherpa = sherpa.rename(columns = {'id_x' : 'id', 'id_y' : 'version'}) -del sherpa['type'] -sherpa - - -# In[25]: - - -# merge avec la table read & publish -rp = pd.merge(rp, applicable_version[['id', 'type']], left_on='article_version', right_on='type', how='left') -rp - - -# In[26]: - - -rp = rp.rename(columns = {'id' : 'version'}) -del rp['type'] -rp - - -# ## Table oa_licence - -# In[27]: - - -# creation du DF -# 'version' n'est pas utilisée, on dédoublonne par nom sans la version -col_names = ['id', - 'name', - 'url' - ] -oa_licence = pd.DataFrame(columns = col_names) -oa_licence - - -# In[28]: - - -# export des licences -sherpa['license'].value_counts() - - -# In[29]: - - -sherpa_licences = sherpa['license'].drop_duplicates() -sherpa_licences = sherpa_licences.dropna() -sherpa_licences - - -# In[30]: - - -oa_licence['sherpa_code'] = np.nan -oa_licence - - -# In[31]: - - -for code in sherpa_licences: - print (code) - oa_licence = oa_licence.append({'sherpa_code' : code}, ignore_index=True) - - -# In[32]: - - -oa_licence - - -# In[33]: - - -# convertir l'index en id -oa_licence = oa_licence.reset_index() -# ajout de l'id avec l'index + 1 -oa_licence['id'] = oa_licence['index'] + 1 -del oa_licence['index'] -oa_licence - - -# In[34]: - - -# ajout du nom et des URLs -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by', 'name'] = 'CC BY' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by', 'url'] = 'https://creativecommons.org/licenses/by/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_sa', 'name'] = 'CC BY-SA' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_sa', 'url'] = 'https://creativecommons.org/licenses/by-sa/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc', 'name'] = 'CC BY-NC' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc', 'url'] = 'https://creativecommons.org/licenses/by-nc/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_sa', 'name'] = 'CC BY-NC-SA' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_sa', 'url'] = 'https://creativecommons.org/licenses/by-nc-sa/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nd', 'name'] = 'CC BY-ND' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nd', 'url'] = 'https://creativecommons.org/licenses/by-nd/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_nd', 'name'] = 'CC BY-NC-ND' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_nd', 'url'] = 'https://creativecommons.org/licenses/by-nc-nd/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc0', 'name'] = 'CC0' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc0', 'url'] = 'https://creativecommons.org/publicdomain/zero/1.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'name'] = 'Specific license' -oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'url'] = '' -oa_licence.loc[oa_licence['sherpa_code'] == 'all_rights_reserved', 'name'] = 'All rights reserved' -oa_licence.loc[oa_licence['sherpa_code'] == 'all_rights_reserved', 'url'] = '' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_gnu_gpl', 'name'] = 'GNU GPL' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_gnu_gpl', 'url'] = 'http://gnugpl.org/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_public_domain', 'name'] = 'Public domain' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_public_domain', 'url'] = 'https://creativecommons.org/share-your-work/public-domain/' -# oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'url'] = 'https://port.sas.ac.uk/mod/book/view.php?id=1340&chapterid=1003' -oa_licence - - -# In[35]: - - -# ajout de la valeur UNKNOWN -oa_licence = oa_licence.append({'id' : 999999, 'sherpa_code' : '___', 'name' : 'UNKNOWN', 'url' : ''}, ignore_index=True) -oa_licence - - -# In[36]: - - -# ajout aux tables sherpa et rp -sherpa = sherpa.rename(columns = {'license' : 'sherpa_code'}) -sherpa - - -# In[37]: - - -# ajout aux tables sherpa et rp -rp = rp.rename(columns = {'license' : 'sherpa_code'}) -rp - - -# In[38]: - - -# merge -sherpa = pd.merge(sherpa, oa_licence[['sherpa_code', 'id']], on='sherpa_code', how='left') -sherpa - - -# In[39]: - - -sherpa = sherpa.rename(columns = {'id_x' : 'id', 'id_y' : 'licence'}) -sherpa - - -# In[40]: - - -# merge -rp = pd.merge(rp, oa_licence[['sherpa_code', 'id']], on='sherpa_code', how='left') -rp - - -# In[41]: - - -rp = rp.rename(columns = {'id' : 'licence'}) -rp - - -# In[42]: - - -# renommage des champs finaux -oa_licence_export = oa_licence[['id', 'name', 'url']] -oa_licence_export = oa_licence_export.rename(columns={'name' : 'name_or_abbrev', 'url' : 'website'}) - - -# In[43]: - - -# export de la table oa_licence -result = oa_licence_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/licence.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[44]: - - -# export csv -oa_licence_export.to_csv('sample/licence.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[45]: - - -# export excel -oa_licence_export.to_excel('sample/licence.xlsx', index=False) - - -# ## Table cost_factor_type - -# In[46]: - - -# creation du DF -col_names = ['id', - 'name' - ] -cost_factor_type = pd.DataFrame(columns = col_names) -cost_factor_type = cost_factor_type.append({'id' : 1, 'name' : 'APC'}, ignore_index=True) -cost_factor_type = cost_factor_type.append({'id' : 2, 'name' : 'Discount'}, ignore_index=True) -cost_factor_type = cost_factor_type.append({'id' : 3, 'name' : 'Refund'}, ignore_index=True) -cost_factor_type - - -# In[47]: - - -# ajout de la valeur UNKNOWN -cost_factor_type = cost_factor_type.append({'id' : 999999, 'name' : 'UNKNOWN'}, ignore_index=True) -cost_factor_type - - -# In[48]: - - -# export de la table -result = cost_factor_type.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/cost_factor_type.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[49]: - - -# export csv -cost_factor_type.to_csv('sample/cost_factor_type.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[50]: - - -# export excel -cost_factor_type.to_excel('sample/cost_factor_type.xlsx', index=False) - - -# ## Table cost_factor - -# ### Ajout des données des APCs depuis DOAJ - -# In[51]: - - -# ajout de DOAJ info -doaj = pd.read_csv('doaj/journalcsv__doaj_20210312_0636_utf8.csv', encoding='utf-8', header=0) -doaj - - -# In[52]: - - -# garder les lignes avec APC -doaj_apc = doaj.loc[doaj['APC'] == 'Yes'][['Journal ISSN (print version)', 'Journal EISSN (online version)', 'APC amount']] -doaj_apc - - -# In[53]: - - -# garder les lignes avec APC no -doaj_apc_no = doaj.loc[doaj['APC'] == 'No'][['Journal ISSN (print version)', 'Journal EISSN (online version)']] -doaj_apc_no - - -# In[54]: - - -# attribuer la valeur 0 -doaj_apc_no['APC amount'] = 0 -doaj_apc_no - - -# In[55]: - - -# ajout à la table des APC -doaj_apc = doaj_apc.append(doaj_apc_no, ignore_index=True) -doaj_apc - - -# In[56]: - - -# découpage du prix en 'amount' et 'symbol' -doaj_apc[['amount', 'symbol']] = doaj_apc['APC amount'].str.split(' ', n=1, expand=True) -doaj_apc - - -# In[57]: - - -doaj_apc.loc[doaj_apc['APC amount'] == 0, 'amount'] = 0 -doaj_apc.loc[doaj_apc['APC amount'] == 0, 'symbol'] = '' -doaj_apc - - -# In[58]: - - -# ajouter les champs manquants -doaj_apc['cost_factor_type'] = 1 -doaj_apc['comment'] = 'Source: DOAJ' -doaj_apc - - -# In[59]: - - -# renommer les champs -doaj_apc = doaj_apc.rename(columns = {'Journal ISSN (print version)' : 'issn_print', 'Journal EISSN (online version)' : 'issn_electronic'}) -doaj_apc - - -# In[60]: - - -# ajout du issn -doaj_apc['issn'] = doaj_apc['issn_electronic'] -doaj_apc - - -# In[61]: - - -doaj_apc.loc[doaj_apc['issn'].isna()] - - -# In[62]: - - -# ajout du issnp quand c'est vide -doaj_apc.loc[doaj_apc['issn'].isna(), 'issn'] = doaj_apc['issn_print'] -doaj_apc.loc[doaj_apc['issn'].isna()] - - -# In[63]: - - -doaj_apc = pd.merge(doaj_apc, issns, on='issn', how='left') -doaj_apc - - -# In[64]: - - -# renommer les colonnes -doaj_apc = doaj_apc.rename(columns={'issnl' : 'issn_link'}) -doaj_apc - - -# ### Ajout des APCs depuis la base Journal Database (Zurich Open Repository and Archive) -# -# https://www.jdb.uzh.ch/ - -# In[65]: - - -# JDB base de Zurich -jdb = pd.read_csv('zora/jdb_apcs.tsv', encoding='utf-8', header=0, sep='\t') -jdb - - -# In[66]: - - -# renommer l'id -jdb = jdb.rename(columns = {'id' : 'jdb_id'}) -jdb - - -# In[67]: - - -# ajouter les champs manquants -jdb['cost_factor_type'] = 1 -jdb['comment'] = 'Source: JDB (' + jdb['apc_date'].astype(str) + ')' -jdb - - -# In[68]: - - -# renommer les champs -jdb = jdb.rename(columns = {'apc_fee' : 'amount', 'apc_currency' : 'symbol'}) -jdb - - -# In[69]: - - -jdb = jdb.drop_duplicates(subset='jdb_id', keep='last') - - -# In[70]: - - -# import openapc avec les valeurs max -openapc = pd.read_csv('openapc/open_apc_max.tsv', encoding='utf-8', header=0, sep='\t') -openapc - - -# In[71]: - - -# renommer les champs -openapc = openapc.rename(columns = {'period' : 'apc_date', 'issn_l' : 'issn_link', 'euro' : 'amount'}) -openapc - - -# In[72]: - - -# ajouter le lien avec le type et le symbole -openapc['cost_factor_type'] = 1 -openapc['jdb_id'] = np.nan -openapc['symbol'] = 'EUR' -openapc['comment'] = 'Source: OpenAPC (' + openapc['apc_date'].astype(str) + ')' -openapc - - -# In[73]: - - -# ajout des lignes de openapc -jdb = jdb.append(openapc, ignore_index=True) -jdb - - -# In[74]: - - -# supprimer les doublons par issnl et date -jdb = jdb.drop_duplicates(subset=['issn_link', 'apc_date'], keep='first') -jdb - - -# In[75]: - - -# ajout de DOAJ -cost_factor = doaj_apc.append(jdb, ignore_index=True) -cost_factor - - -# In[76]: - - -# test issnl -cost_factor.loc[cost_factor['issn_link'].isna()] - - -# In[77]: - - -# merge avec issnl -cost_factor = pd.merge(cost_factor, issns, on='issn', how='left') -cost_factor - - -# In[78]: - - -# test issnl -cost_factor.loc[cost_factor['issnl'].isna()] - - -# In[79]: - - -#ajout des issn quand ça manque -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_print'] -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_electronic'] -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_link'] -cost_factor.loc[cost_factor['issn'].isna()] - - -# In[80]: - - -#ajout des issnl quand ça manque -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_link'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_print'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_electronic'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn'] -cost_factor.loc[cost_factor['issnl'].isna()] - - -# In[81]: - - -# prendre les ids pour le merge -cost_factor_ids = cost_factor[['issn', 'issnl', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_1 = cost_factor_ids_1.rename(columns = {'issn_link' : 'issn'}) -# cost_factor_ids_2 = cost_factor.loc[cost_factor['issn_electronic'].notna()][['issn_electronic', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_2 = cost_factor_ids_2.rename(columns = {'issn_electronic' : 'issn'}) -# cost_factor_ids_3 = cost_factor.loc[cost_factor['issn_print'].notna()][['issn_print', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_3 = cost_factor_ids_3.rename(columns = {'issn_print' : 'issn'}) -# cost_factor_ids_4 = cost_factor.loc[cost_factor['issn'].notna()][['issn', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids = cost_factor_ids_1.append(cost_factor_ids_2) -# cost_factor_ids = cost_factor_ids.append(cost_factor_ids_3) -# cost_factor_ids = cost_factor_ids.append(cost_factor_ids_4) -cost_factor_ids - - -# In[82]: - - -# supprimer les doublons et les vides -cost_factor_ids = cost_factor_ids.drop_duplicates(subset=['issnl']) -cost_factor_ids - - -# In[83]: - - -# merge dans l'autre sens pour garder que les lignes du fichier -cost_factor_ids = pd.merge(cost_factor_ids, sherpa[['id', 'issnl']], on='issnl', how='left') -cost_factor_ids - - -# In[84]: - - -# garder les lignes avec merge -cost_factor_ids_all = cost_factor_ids.loc[cost_factor_ids['id'].notnull()] -cost_factor_ids_all - - -# In[85]: - - -# supprimer les doublons -cost_factor_ids_all = cost_factor_ids_all.drop_duplicates(subset=['id']) -cost_factor_ids_all - - -# In[86]: - - -# supprimer les doublons par issnl -cost_factor_ids_all = cost_factor_ids_all.drop_duplicates(subset=['issnl']) -del cost_factor_ids_all['id'] -cost_factor_ids_all - - -# In[87]: - - -# convertir l'index en id -cost_factor_ids_all = cost_factor_ids_all.reset_index() -# ajout de l'id avec l'index + 1 -cost_factor_ids_all['cost_factor'] = cost_factor_ids_all['index'] + id_start -del cost_factor_ids_all['index'] -# convertir l'index en id -cost_factor_ids_all = cost_factor_ids_all.reset_index() -# ajout de l'id avec l'index + 1 -cost_factor_ids_all['cost_factor'] = cost_factor_ids_all['index'] + id_start -del cost_factor_ids_all['index'] -cost_factor_ids_all - - -# In[88]: - - -# merge avec la table sherpa -sherpa = pd.merge(sherpa, cost_factor_ids_all[['issnl', 'cost_factor']], on='issnl', how='left') -sherpa - - -# In[89]: - - -sherpa.loc[sherpa['cost_factor'].isna()] - - -# In[90]: - - -# garder les APCs pour la version published -sherpa.loc[sherpa['article_version'] != 'published', 'cost_factor'] = np.nan -sherpa.loc[sherpa['cost_factor'].notna()] - - -# In[91]: - - -# renommer l'id du fichier sherpa brut -# cost_factor_ids_all = cost_factor_ids_all.rename(columns = {'id' : 'id_sherpa'}) -cost_factor_ids_all = cost_factor_ids_all.rename(columns = {'cost_factor' : 'id'}) -cost_factor_ids_all - - -# In[92]: - - -cost_factor_ids_all['id'] = cost_factor_ids_all['id'].astype(int) - - -# In[93]: - - -cost_factor_ids_all - - -# In[94]: - - -cost_factor_export = cost_factor_ids_all[['id', 'cost_factor_type', 'amount', 'symbol', 'comment']] -cost_factor_export - - -# In[95]: - - -cost_factor_export.shape[0] - - -# In[96]: - - -# ajout de la valeur Rabais 100% pour les licences Read & Publish -rpid = cost_factor_export.shape[0] + 1 -cost_factor_export = cost_factor_export.append({'id' : rpid, 'cost_factor_type' : 2, 'amount' : 100, 'symbol' : '%', 'comment' : 'Read & Publish agreement'}, ignore_index=True) -cost_factor_export - - -# In[97]: - - -# ajout de l'id dans la table read & publish -rp['cost_factor'] = rpid -rp - - -# In[98]: - - -# ajout de la valeur UNKNOWN -cost_factor_export = cost_factor_export.append({'id' : 999999, 'cost_factor_type' : 999999, 'amount' : 0, 'symbol' : '', 'comment' : 'UNKNOWN'}, ignore_index=True) -cost_factor_export - - -# In[99]: - - -# export de la table -result = cost_factor_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/cost_factor.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[100]: - - -# export csv -cost_factor_export.to_csv('sample/cost_factor.tsv', index=False) - - -# In[101]: - - -# export excel -cost_factor_export.to_excel('sample/cost_factor.xlsx', index=False) - - -# ## Table term - -# In[102]: - - -sherpa - - -# In[103]: - - -# col_names = ['id', 'applicable_version', 'cost_factor', 'embargo', 'archiving'] -term_sherpa = sherpa[['id', 'version', 'cost_factor', 'embargo', 'archiving', 'locations_ir', 'locations_not_ir', 'licence', 'journal', 'conditions', 'public_notes', 'prerequisite_funders', 'prerequisite_funders_ror']] -term_sherpa - - -# In[104]: - - -# renommer les champs -term_sherpa = term_sherpa.rename(columns = {'id' : 'id_sherpa', 'embargo' : 'embargo_months', 'prerequisite_funders_ror' : 'ror'}) -term_sherpa - - -# In[105]: - - -# merge des champs dans le comment : conditions, public_notes, locations_not_ir -term_sherpa['conditions'] = term_sherpa['conditions'].fillna('') -term_sherpa['public_notes'] = term_sherpa['public_notes'].fillna('') -term_sherpa['locations_not_ir'] = term_sherpa['locations_not_ir'].fillna('') -term_sherpa['locations_ir'] = term_sherpa['locations_ir'].fillna('') -term_sherpa.loc[term_sherpa['locations_not_ir'] != '', 'locations_not_ir'] = 'Non institutional archiving locations: ' + term_sherpa['locations_not_ir'] -term_sherpa.loc[term_sherpa['locations_ir'] != '', 'locations_ir'] = 'Institutional archiving locations: ' + term_sherpa['locations_ir'] -term_sherpa.loc[term_sherpa['archiving'] == False, 'comment'] = term_sherpa['locations_not_ir'] -term_sherpa.loc[term_sherpa['archiving'] == True, 'comment'] = term_sherpa['locations_ir'] -term_sherpa.loc[term_sherpa['comment'] == '', 'comment'] = 'Conditions: ' + term_sherpa['conditions'] -term_sherpa.loc[(term_sherpa['comment'] != '') & (term_sherpa['conditions'] != ''), 'comment'] = term_sherpa['comment'] + ' ; Conditions: ' + term_sherpa['conditions'] -term_sherpa.loc[(term_sherpa['public_notes'] != '') & (term_sherpa['public_notes'] != term_sherpa['comment']), 'comment'] = term_sherpa['comment'] + ' ; Public notes: ' + term_sherpa['public_notes'] -term_sherpa.loc[(term_sherpa['public_notes'] != '') & (term_sherpa['comment'] == ''), 'comment'] = 'Public notes: ' + term_sherpa['public_notes'] -term_sherpa - - -# In[106]: - - -term_sherpa['prerequisite_funders'].value_counts() - - -# In[107]: - - -rp - - -# In[108]: - - -term_rp = rp[['rp_id', 'version', 'archiving', 'embargo_months', 'cost_factor', 'licence', 'journal', 'rp_publisher', 'ror', 'valid_from', 'valid_until']] -term_rp - - -# In[109]: - - -term_rp['rp_publisher'].value_counts() - - -# In[110]: - - -term_rp.loc[term_rp['rp_publisher'] == 'Elsevier', 'comment'] = 'Elsevier Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'Wiley', 'comment'] = 'Wiley Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'TF', 'comment'] = 'Taylor and Francis Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'Springer Nature ', 'comment'] = 'Springer Nature Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'CUP', 'comment'] = 'Cambridge University Press (CUP) Read & Publish agreement. Article types covered: Research Articles, Review Articles, Rapid Communication, Brief Reports and Case Reports' -del term_rp['rp_publisher'] -term_rp - - -# In[111]: - - -# cocnat de deux tables -term_orig = term_sherpa[['id_sherpa', 'version', 'cost_factor', 'embargo_months', 'archiving', 'licence', 'journal', 'prerequisite_funders', 'ror', 'comment']] -term_orig - - -# In[112]: - - -term_orig = term_orig.append(term_rp, ignore_index=True, sort=False) -term_orig - - -# In[113]: - - -# ajout d'un hash unique pour chaque variante -term_orig['id_content_hash'] = term_orig.apply(lambda x: hash(tuple(x[['version', 'cost_factor', 'embargo_months', 'archiving', 'comment']])), axis = 1) -term_orig['id_content_hash_licence'] = term_orig.apply(lambda x: hash(tuple(x[['version', 'cost_factor', 'embargo_months', 'archiving', 'licence', 'comment']])), axis = 1) - - -# In[114]: - - -term_orig.sort_values(by='id_content_hash') - - -# In[115]: - - -# doublons -term_orig.loc[term_orig.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') - - -# In[116]: - - -term_orig['licence'] = term_orig['licence'].fillna(999999) -term_orig['licence'] = term_orig['licence'].astype(int) -term_orig['cost_factor'] = term_orig['cost_factor'].fillna(999999) -term_orig['cost_factor'] = term_orig['cost_factor'].astype(int) -# term_orig['embargo_months'] = term_orig['embargo_months'].fillna(0) -# term_orig['embargo_months'] = term_orig['embargo_months'].astype(int) -term_orig.loc[term_orig['archiving'] == True, 'ir_archiving'] = 1 -term_orig.loc[term_orig['archiving'] == False, 'ir_archiving'] = 0 -term_orig['ir_archiving'] = term_orig['ir_archiving'].fillna(0) -term_orig - - -# In[117]: - - -term_orig.loc[term_orig['ir_archiving'].isna()] - - -# In[118]: - - -term_orig['ir_archiving'].value_counts() - - -# In[119]: - - -term_orig['licence'] = term_orig['licence'].astype(int) -term_orig['ir_archiving'] = term_orig['ir_archiving'].astype(int) -term_orig['cost_factor'] = term_orig['cost_factor'].astype(int) -term_orig - - -# In[120]: - - -terms_export_dates = term_orig.loc[(term_orig['valid_from'].notna()) | (term_orig['valid_until'].notna())][['id_content_hash', 'ror', 'valid_from', 'valid_until']] -terms_export_dates - - -# In[121]: - - -terms_export = term_orig[['id_sherpa', 'rp_id', 'id_content_hash', 'id_content_hash_licence', 'version', 'cost_factor', 'embargo_months', 'ir_archiving', 'licence', 'comment']] -terms_export - - -# In[122]: - - -# test de doublons -terms_export.loc[terms_export.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') - - -# In[123]: - - -terms_export_dedup = terms_export.drop_duplicates(subset=['id_content_hash']) -terms_export_dedup - - -# In[124]: - - -terms_export_dedup_licence = terms_export.drop_duplicates(subset=['id_content_hash_licence']) -terms_export_dedup_licence - - -# In[125]: - - -# test de doublons -terms_export_dedup_licence.loc[terms_export_dedup_licence.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') - - -# In[126]: - - -# totaux pour les deux sources -terms_export_dedup.loc[terms_export_dedup['id_sherpa'].notna()].shape[0] - - -# In[127]: - - -terms_export_dedup.loc[terms_export_dedup['rp_id'].notna()].shape[0] - - -# In[128]: - - -terms_export_dedup.loc[terms_export_dedup['rp_id'].notna()] - - -# In[129]: - - -# convertir l'index en id -terms_export_dedup.reset_index(inplace=True) -del terms_export_dedup['index'] -terms_export_dedup - - -# In[130]: - - -# ajout de l'id avec l'index + 1 -terms_export_dedup['id'] = terms_export_dedup.index + 1 -# del terms_export_dedup['index'] -terms_export_dedup - - -# In[131]: - - -terms_export_dedup['source'] = '' -terms_export_dedup - - -# In[132]: - - -# grouper par licence -terms_export_dedup_licences = terms_export_dedup_licence[['licence', 'id_content_hash']] -terms_export_dedup_licences - - -# In[133]: - - -# concat valeurs avec même id -terms_export_dedup_licences['licence'] = terms_export_dedup_licences['licence'].astype(str) -terms_export_dedup_licences = terms_export_dedup_licences.groupby('id_content_hash').agg({'licence': lambda x: ', '.join(x)}) -terms_export_dedup_licences - - -# In[134]: - - -# test des valeur multiples -terms_export_dedup_licences.loc[terms_export_dedup_licences['licence'].str.contains(',')] - - -# In[135]: - - -# ajout des licences groupées -terms_export_dedup_fin = pd.merge(terms_export_dedup, terms_export_dedup_licences, on='id_content_hash', how='left') -terms_export_dedup_fin - - -# In[136]: - - -# merge avec les dates pour avoir les terms ids -terms_export_dates = pd.merge(terms_export_dates, terms_export_dedup_fin[['id_content_hash', 'id']], on='id_content_hash') -terms_export_dates = terms_export_dates.rename(columns = {'id' : 'term'}) -terms_export_dates - - -# In[137]: - - -# renommer les champs de licence -del terms_export_dedup_fin['licence_x'] -terms_export_dedup_fin = terms_export_dedup_fin.rename(columns = {'licence_y' : 'licence'}) - - -# In[138]: - - -terms_export_fin = terms_export_dedup_fin[['version', 'cost_factor', 'embargo_months', 'ir_archiving', 'licence', 'comment', 'id', 'source']] -terms_export_fin - - -# In[139]: - - -# export de la table -result = terms_export_fin.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/term.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[140]: - - -# export csv -terms_export_fin.to_csv('sample/term.tsv', index=False) - - -# In[141]: - - -# export excel -terms_export_fin.to_excel('sample/term.xlsx', index=False) - - -# ## Table condition_type - -# In[142]: - - -# Journal-only, Organization-only, Journal-organization agreement -col_names = ['id', - 'condition_issuer' - ] -condition_type = pd.DataFrame(columns = col_names) -condition_type = condition_type.append({'id' : 1, 'condition_issuer' : 'Journal-only'}, ignore_index=True) -condition_type = condition_type.append({'id' : 2, 'condition_issuer' : 'Organization-only'}, ignore_index=True) -condition_type = condition_type.append({'id' : 3, 'condition_issuer' : 'Journal-organization agreement'}, ignore_index=True) -condition_type - - -# In[143]: - - -# export de la table -result = condition_type.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/condition_type.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[144]: - - -# export csv -condition_type.to_csv('sample/condition_type.tsv', index=False) - - -# In[145]: - - -# export excel -condition_type.to_excel('sample/condition_type.xlsx', index=False) - - -# ## Table organization - -# In[146]: - - -# extraction des organizations (funders) -sherpa - - -# In[147]: - - -sherpa.loc[sherpa['prerequisite_funders'].notna()] - - -# In[148]: - - -sherpa['prerequisite_funders'].value_counts() - - -# In[149]: - - -funders = sherpa.loc[sherpa['prerequisite_funders'].notna()][['prerequisite_funders_name', 'prerequisite_funders_fundref', 'prerequisite_funders_ror', 'prerequisite_funders_country', 'prerequisite_funders_url', 'prerequisite_funders_sherpa_id']] -funders - - -# In[150]: - - -funders_dedup = funders.drop_duplicates(subset='prerequisite_funders_ror') -funders_dedup - - -# In[151]: - - -funders_dedup.shape[0] - - -# In[152]: - - -# export excel -funders_dedup.to_excel('sample/funders.xlsx', index=False) - - -# In[153]: - - -# export csv -funders_dedup.to_csv('sample/funders.tsv', index=False) - - -# In[154]: - - -# creation du DF -organization_funders = funders_dedup -organization_funders = organization_funders.rename(columns = {'prerequisite_funders_name' : 'name', - 'prerequisite_funders_fundref' : 'fundref', - 'prerequisite_funders_ror' : 'ror', - 'prerequisite_funders_country' : 'iso_code', - 'prerequisite_funders_url' : 'website', - 'prerequisite_funders_sherpa_id' : 'sherpa_id' - }) -organization_funders - - -# In[155]: - - -# lien avec les pays -country = pd.read_csv('sample/country.tsv', encoding='utf-8', header=0, sep='\t') -country - - -# In[156]: - - -# merge avec les pays -organization_funders['iso_code'] = organization_funders['iso_code'].str.upper() -organization_funders['is_funder'] = 1 -organization_funders = pd.merge(organization_funders, country[['iso_code', 'id']], how='left', on='iso_code') -organization_funders - - -# In[157]: - - -organization_funders = organization_funders.rename(columns = {'id' : 'country'}) -organization_funders - - -# In[158]: - - -# ajout des organizations suisses -organization = pd.read_csv('ror/ror_ch_hei_export.tsv', encoding='utf-8', header=0, sep='\t', dtype={'fundref': str, 'orgref': str}, na_filter=False) -organization - - -# In[159]: - - -# tri par nom -organization = organization.sort_values(by='name') -organization - - -# In[160]: - - -organization = organization.reset_index(drop=True) -organization - - -# In[161]: - - -# mettre l'EPFL en position 1 et UNIGE en 2 -target_row = 32 -# Move target row to first element of list. -idx = [target_row] + [i for i in range(len(organization)) if i != target_row] -organization = organization.iloc[idx] -organization - - -# In[162]: - - -organization = organization.reset_index(drop=True) -organization - - -# In[163]: - - -# mettre l'EPFL en position 1 et UNIGE en 2 -target_row = 45 -# Move target row to first element of list. -idx = [target_row] + [i for i in range(len(organization)) if i != target_row] -organization = organization.iloc[idx] -organization - - -# In[164]: - - -organization = organization.reset_index(drop=True) -organization - - -# In[165]: - - -# ajout des funders -organization = organization.append(organization_funders, ignore_index=True) -organization - - -# In[166]: - - -# remplacement dans le fundref id qui renvoie vers du JSON seulement -# URL actuel : http://data.crossref.org/fundingdata/funder/10.13039/[fundref id] -# ex : http://dx.doi.org/10.13039/501100007903 -# redirigé sur : http://data.crossref.org/fundingdata/funder/10.13039/501100007903 -# URL des publications financées : https://search.crossref.org/funding?q=[fundref id]&from_ui=yes -# ex : https://search.crossref.org/funding?q=501100003006&from_ui=yes -organization['fundref'] = organization['fundref'].str.replace('http://dx.doi.org/10.13039/', '') -organization - - -# In[167]: - - -# df pour l'export -organization_export = organization[['name', 'website', 'country', 'starting_year', 'is_funder', 'ror', 'fundref']] -organization_export - - -# In[168]: - - -# ajout des valeurs vides -organization_export['starting_year'] = organization_export['starting_year'].fillna(0) -organization_export['fundref'] = organization_export['fundref'].fillna('') -organization_export['ror'] = organization_export['ror'].fillna('') -organization_export - - -# In[169]: - - -# ajout de l'id avec l'index + 1 -organization_export['id'] = organization_export.index + 1 -# del terms_export_dedup['index'] -organization_export - - -# In[170]: - - -# export de la table -result = organization_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/organization.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[171]: - - -# export excel -organization_export.to_excel('sample/organization.xlsx', index=False) - - -# In[172]: - - -# export csv -organization_export.to_csv('sample/organization.tsv', index=False) - - -# ## Table condition_set_term - -# In[173]: - - -term_orig - - -# In[174]: - - -terms_export_dedup - - -# In[175]: - - -# merge des terms id -term_orig = pd.merge(term_orig, terms_export_dedup[['id_content_hash', 'id']], on='id_content_hash', how='left') -term_orig - - -# In[176]: - - -term_orig = term_orig.rename(columns = {'id' : 'term'}) -term_orig - - -# In[177]: - - -condition_type - - -# In[178]: - - -# merge des condition type -term_orig['condition_type'] = 3 -term_orig.loc[term_orig['ror'].isna(), 'condition_type'] = 1 -term_orig - - -# In[179]: - - -organization_export - - -# In[180]: - - -# merge des organizations -term_orig = pd.merge(term_orig, organization_export[['ror', 'id']], on='ror', how='left') -term_orig - - -# In[181]: - - -term_orig = term_orig.rename(columns = {'id' : 'organization'}) -term_orig - - -# In[182]: - - -# concat valeurs avec même id -condition_set_term_dedup_terms = term_orig[['term', 'id_content_hash']] -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms.drop_duplicates() -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms_dedup.loc[condition_set_term_dedup_terms_dedup['term'].notna()] -condition_set_term_dedup_terms_dedup['term'] = condition_set_term_dedup_terms_dedup['term'].astype(int) -condition_set_term_dedup_terms_dedup['term'] = condition_set_term_dedup_terms_dedup['term'].astype(str) -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms_dedup.groupby('id_content_hash').agg({'term': lambda x: ', '.join(x)}) -condition_set_term_dedup_terms_dedup - - -# In[183]: - - -# concat valeurs avec même id -condition_set_term_dedup_journals = term_orig[['journal', 'id_content_hash']] -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals.drop_duplicates() -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals_dedup.loc[condition_set_term_dedup_journals_dedup['journal'].notna()] -condition_set_term_dedup_journals_dedup['journal'] = condition_set_term_dedup_journals_dedup['journal'].astype(int) -condition_set_term_dedup_journals_dedup['journal'] = condition_set_term_dedup_journals_dedup['journal'].astype(str) -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals_dedup.groupby('id_content_hash').agg({'journal': lambda x: ', '.join(x)}) -condition_set_term_dedup_journals_dedup - - -# In[184]: - - -# concat valeurs avec même id -condition_set_term_dedup_organizations = term_orig[['organization', 'id_content_hash']] -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations.drop_duplicates() -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations_dedup.loc[condition_set_term_dedup_organizations_dedup['organization'].notna()] -condition_set_term_dedup_organizations_dedup['organization'] = condition_set_term_dedup_organizations_dedup['organization'].astype(int) -condition_set_term_dedup_organizations_dedup['organization'] = condition_set_term_dedup_organizations_dedup['organization'].astype(str) -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations_dedup.groupby('id_content_hash').agg({'organization': lambda x: ', '.join(x)}) -condition_set_term_dedup_organizations_dedup - - -# In[185]: - - -# concat valeurs avec même id : pas possible pour condition_type -condition_set_term_dedup_condition_types = term_orig[['condition_type', 'id_content_hash']] -condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types.drop_duplicates() -condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types_dedup.loc[condition_set_term_dedup_condition_types_dedup['condition_type'].notna()] -# condition_set_term_dedup_condition_types_dedup['condition_type'] = condition_set_term_dedup_condition_types_dedup['condition_type'].astype(int) -# condition_set_term_dedup_condition_types_dedup['condition_type'] = condition_set_term_dedup_condition_types_dedup['condition_type'].astype(str) -# condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types_dedup.groupby('id_content_hash').agg({'condition_type': lambda x: ', '.join(x)}) -condition_set_term_dedup_condition_types_dedup - - -# In[186]: - - -# recuperation des ids groupés -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_terms_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_journals_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_organizations_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_condition_types_dedup, on='id_content_hash', how='left') -terms_export_dedup - - -# In[187]: - - -condition_sets_orig = terms_export_dedup[['term', 'condition_type', 'organization', 'journal']] -condition_sets_orig - - -# In[188]: - - -# ajout d'un hash unique pour chaque variante -condition_sets_orig['id_term_hash'] = condition_sets_orig.apply(lambda x: hash(tuple(x[['condition_type', 'organization', 'journal']])), axis = 1) -condition_sets_orig - - -# In[189]: - - -# grouper les termes qui ont les mêmes valeurs pour le reste -condition_sets_orig_terms = condition_sets_orig[['term', 'id_term_hash']] -condition_sets_orig_terms_dedup = condition_sets_orig_terms.drop_duplicates() -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['term'].notna()] -condition_sets_orig_terms_dedup['term'] = condition_sets_orig_terms_dedup['term'].astype(int) -condition_sets_orig_terms_dedup['term'] = condition_sets_orig_terms_dedup['term'].astype(str) -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.groupby('id_term_hash').agg({'term': lambda x: ', '.join(x)}) -condition_sets_orig_terms_dedup - - -# In[190]: - - -# ajout des ids groupées -condition_sets_orig_terms = pd.merge(condition_sets_orig, condition_sets_orig_terms_dedup, on='id_term_hash', how='left') -condition_sets_orig_terms - - -# In[191]: - - -# rename terms -del condition_sets_orig_terms['term_x'] -condition_sets_orig_terms = condition_sets_orig_terms.rename(columns = {'term_y' : 'term'}) -condition_sets_orig_terms - - -# In[192]: - - -# test duplicates -condition_sets_orig_terms.loc[condition_sets_orig_terms.duplicated()].sort_values(by='term') - - -# In[193]: - - -condition_sets_orig_terms.loc[condition_sets_orig_terms.duplicated()].shape[0] - - -# In[194]: - - -condition_sets_orig_terms_dedup = condition_sets_orig_terms.drop_duplicates() -condition_sets_orig_terms_dedup - - -# In[195]: - - -# ajout des champs manquants -condition_sets_orig_terms_dedup['comment'] = '' - - -# In[196]: - - -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['journal'].isna()] - - -# In[197]: - - -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['term'].isna()] - - -# In[198]: - - -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['condition_type'].isna()] - - -# In[199]: - - -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['organization'].isna()] - - -# In[200]: - - -# remplacement des "nan" -condition_sets_orig_terms_dedup['organization'] = condition_sets_orig_terms_dedup['organization'].fillna('') -condition_sets_orig_terms_dedup - - -# In[201]: - - -# convertir l'index en id -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.reset_index() -# ajout de l'id avec l'index + 1 -condition_sets_orig_terms_dedup['id'] = condition_sets_orig_terms_dedup['index'] + 1 -del condition_sets_orig_terms_dedup['index'] -condition_sets_orig_terms_dedup - - -# In[202]: - - -# convertir l'index en id -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.reset_index() -# ajout de l'id avec l'index + 1 -condition_sets_orig_terms_dedup['id'] = condition_sets_orig_terms_dedup['index'] + 1 -del condition_sets_orig_terms_dedup['index'] -condition_sets_orig_terms_dedup - - -# In[203]: - - -# export de la table -result = condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/condition_set.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[204]: - - -# export excel -condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_excel('sample/condition_set.xlsx', index=False) - - -# In[205]: - - -# export csv -condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_csv('sample/condition_set.tsv', index=False) - - -# ## Table organization_condition_set - -# In[206]: - - -condition_sets_orig_terms_dedup - - -# In[207]: - - -condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['organization'].notna()) & (condition_sets_orig_terms_dedup['organization'] != '')] - - -# In[208]: - - -# creation du DF -# col_names = ['id', -# 'organization', -# 'condition_set', -# 'valid_from', -# 'valid_until' -# ] -# organization_condition = pd.DataFrame(columns = col_names) -organization_condition = condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['organization'].notna()) & (condition_sets_orig_terms_dedup['organization'] != '')][['id', 'organization', 'term']] -organization_condition - - -# In[209]: - - -# extraction des terms ids -organization_condition_split = organization_condition.assign(term = organization_condition.term.str.split(',')).explode('term') -organization_condition_split - - -# In[210]: - - -organization_condition_split.loc[organization_condition_split['organization'].isna()] - - -# In[211]: - - -organization_condition_split.loc[organization_condition_split['term'].isna()] - - -# In[212]: - - -organization_condition_split['term'] = organization_condition_split['term'].astype(int) -organization_condition_split - - -# In[213]: - - -# ajout du ROR -terms_export_dates - - -# In[214]: - - -# merge pour obtenir les dates -organization_condition_split = pd.merge(organization_condition_split, terms_export_dates[['term', 'valid_from', 'valid_until']], on='term', how='left') -organization_condition_split - - -# In[215]: - - -# dédoublonage -organization_condition_split_dedup = organization_condition_split.drop_duplicates() -organization_condition_split_dedup - - -# In[216]: - - -organization_condition = pd.merge(organization_condition, organization_condition_split_dedup[['id', 'valid_from', 'valid_until']], on='id', how='left') -organization_condition - - -# In[217]: - - -organization_condition = organization_condition.rename(columns = {'id' : 'condition_set'}) -organization_condition['valid_from'] = organization_condition['valid_from'].fillna('') -organization_condition['valid_until'] = organization_condition['valid_until'].fillna('') -organization_condition - - -# In[218]: - - -# split final pour avoir une ligne par organization -organization_condition_fin = organization_condition.assign(organization = organization_condition.organization.str.split(',')).explode('organization') -organization_condition_fin - - -# In[219]: - - -# ajout de l'id avec l'index + 1 -organization_condition_fin = organization_condition_fin.reset_index() -organization_condition_fin['id'] = organization_condition_fin.index + 1 -del organization_condition_fin['index'] -organization_condition_fin - - -# In[220]: - - -# export de la table -result = organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/organization_condition.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[221]: - - -# export excel -organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_excel('sample/organization_condition.xlsx', index=False) - - -# In[222]: - - -# export csv -organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_csv('sample/organization_condition.tsv', index=False) - - -# ## Table journal_condition_set - -# In[223]: - - -# creation du DF -# col_names = ['id', -# 'journal', -# 'condition_set', -# 'valid_from', -# 'valid_until' -# ] -# journal_condition = pd.DataFrame(columns = col_names) -journal_condition = condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['journal'].notna()) & (condition_sets_orig_terms_dedup['journal'] != '')][['id', 'journal']] -journal_condition - - -# In[224]: - - -journal_condition = journal_condition.rename(columns = {'id' : 'condition_set'}) -journal_condition['valid_from'] = '' -journal_condition['valid_until'] = '' -journal_condition - - -# In[225]: - - -# split final pour avoir une ligne par journal -journal_condition_fin = journal_condition.assign(journal = journal_condition.journal.str.split(',')).explode('journal') -journal_condition_fin - - -# In[226]: - - -# ajout de l'id avec l'index + 1 -journal_condition_fin = journal_condition_fin.reset_index() -journal_condition_fin['id'] = journal_condition_fin.index + 1 -del journal_condition_fin['index'] -journal_condition_fin - - -# In[227]: - - -# export de la table -result = journal_condition_fin.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/journal_condition.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[228]: - - -# export excel -journal_condition_fin.to_excel('sample/journal_condition.xlsx', index=False) - - -# In[229]: - - -# export csv -journal_condition_fin.to_csv('sample/journal_condition.tsv', index=False) - - -# In[ ]: - - - - diff --git a/import_scripts/99_oacct_import.md b/import_scripts/99_oacct_import.md deleted file mode 100644 index 75df7e4f..00000000 --- a/import_scripts/99_oacct_import.md +++ /dev/null @@ -1,212 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'importer les données en utilisant l'API : - -https://oacct-test.epfl.ch/api/ - -Exemple avec Journals : - -https://oacct-test.epfl.ch/api/journal/ - -GET /api/journal/ - -HTTP 200 OK -Allow: GET, POST, HEAD, OPTIONS -Content-Type: application/json -Vary: Accept - -[] - -Media type: application/json - -Content: -``` json -{ - "issn": [], - "name": "", - "name_short_iso_4": "", - "website": "", - "oa_options": "", - "starting_year": null, - "end_year": null, - "doaj_seal": false, - "doaj_status": false, - "lockss": false, - "nlch": false, - "portico": false, - "qoam_av_score": null -} -``` - - - -```python -import json -import requests -import codecs -oacct_login = 'oacct_test' -oacct_pwd = '2f4dBRhyj7' -headers = {'accept': 'application/json'} -``` - - -```python -# test sans authentifications -url = 'https://oacct-test.epfl.ch/api/country/' -r = requests.get(url) -print(r) -``` - - - - - -```python -print(r.text) -``` - - [{"id":1,"name":"Afghanistan","iso_code":"AF"},{"id":249,"name":"Åland Islands","iso_code":"AX"},{"id":2,"name":"Albania","iso_code":"AL"},{"id":3,"name":"Algeria","iso_code":"DZ"},{"id":4,"name":"American Samoa","iso_code":"AS"},{"id":5,"name":"Andorra","iso_code":"AD"},{"id":6,"name":"Angola","iso_code":"AO"},{"id":7,"name":"Anguilla","iso_code":"AI"},{"id":8,"name":"Antarctica","iso_code":"AQ"},{"id":9,"name":"Antigua and Barbuda","iso_code":"AG"},{"id":10,"name":"Argentina","iso_code":"AR"},{"id":11,"name":"Armenia","iso_code":"AM"},{"id":12,"name":"Aruba","iso_code":"AW"},{"id":13,"name":"Australia","iso_code":"AU"},{"id":14,"name":"Austria","iso_code":"AT"},{"id":15,"name":"Azerbaijan","iso_code":"AZ"},{"id":16,"name":"Bahamas (the)","iso_code":"BS"},{"id":17,"name":"Bahrain","iso_code":"BH"},{"id":18,"name":"Bangladesh","iso_code":"BD"},{"id":19,"name":"Barbados","iso_code":"BB"},{"id":20,"name":"Belarus","iso_code":"BY"},{"id":21,"name":"Belgium","iso_code":"BE"},{"id":22,"name":"Belize","iso_code":"BZ"},{"id":23,"name":"Benin","iso_code":"BJ"},{"id":24,"name":"Bermuda","iso_code":"BM"},{"id":25,"name":"Bhutan","iso_code":"BT"},{"id":26,"name":"Bolivia (Plurinational State of)","iso_code":"BO"},{"id":27,"name":"Bonaire, Sint Eustatius and Saba","iso_code":"BQ"},{"id":28,"name":"Bosnia and Herzegovina","iso_code":"BA"},{"id":29,"name":"Botswana","iso_code":"BW"},{"id":30,"name":"Bouvet Island","iso_code":"BV"},{"id":31,"name":"Brazil","iso_code":"BR"},{"id":32,"name":"British Indian Ocean Territory (the)","iso_code":"IO"},{"id":33,"name":"Brunei Darussalam","iso_code":"BN"},{"id":34,"name":"Bulgaria","iso_code":"BG"},{"id":35,"name":"Burkina Faso","iso_code":"BF"},{"id":36,"name":"Burundi","iso_code":"BI"},{"id":37,"name":"Cabo Verde","iso_code":"CV"},{"id":38,"name":"Cambodia","iso_code":"KH"},{"id":39,"name":"Cameroon","iso_code":"CM"},{"id":40,"name":"Canada","iso_code":"CA"},{"id":41,"name":"Cayman Islands (the)","iso_code":"KY"},{"id":42,"name":"Central African Republic (the)","iso_code":"CF"},{"id":43,"name":"Chad","iso_code":"TD"},{"id":44,"name":"Chile","iso_code":"CL"},{"id":45,"name":"China","iso_code":"CN"},{"id":46,"name":"Christmas Island","iso_code":"CX"},{"id":47,"name":"Cocos (Keeling) Islands (the)","iso_code":"CC"},{"id":48,"name":"Colombia","iso_code":"CO"},{"id":49,"name":"Comoros (the)","iso_code":"KM"},{"id":50,"name":"Congo (the Democratic Republic of the)","iso_code":"CD"},{"id":51,"name":"Congo (the)","iso_code":"CG"},{"id":52,"name":"Cook Islands (the)","iso_code":"CK"},{"id":53,"name":"Costa Rica","iso_code":"CR"},{"id":59,"name":"Côte d'Ivoire","iso_code":"CI"},{"id":54,"name":"Croatia","iso_code":"HR"},{"id":55,"name":"Cuba","iso_code":"CU"},{"id":56,"name":"Curaçao","iso_code":"CW"},{"id":57,"name":"Cyprus","iso_code":"CY"},{"id":58,"name":"Czechia","iso_code":"CZ"},{"id":60,"name":"Denmark","iso_code":"DK"},{"id":61,"name":"Djibouti","iso_code":"DJ"},{"id":62,"name":"Dominica","iso_code":"DM"},{"id":63,"name":"Dominican Republic (the)","iso_code":"DO"},{"id":64,"name":"Ecuador","iso_code":"EC"},{"id":65,"name":"Egypt","iso_code":"EG"},{"id":66,"name":"El Salvador","iso_code":"SV"},{"id":67,"name":"Equatorial Guinea","iso_code":"GQ"},{"id":68,"name":"Eritrea","iso_code":"ER"},{"id":69,"name":"Estonia","iso_code":"EE"},{"id":70,"name":"Eswatini","iso_code":"SZ"},{"id":71,"name":"Ethiopia","iso_code":"ET"},{"id":72,"name":"Falkland Islands (the) [Malvinas]","iso_code":"FK"},{"id":73,"name":"Faroe Islands (the)","iso_code":"FO"},{"id":74,"name":"Fiji","iso_code":"FJ"},{"id":75,"name":"Finland","iso_code":"FI"},{"id":76,"name":"France","iso_code":"FR"},{"id":77,"name":"French Guiana","iso_code":"GF"},{"id":78,"name":"French Polynesia","iso_code":"PF"},{"id":79,"name":"French Southern Territories (the)","iso_code":"TF"},{"id":80,"name":"Gabon","iso_code":"GA"},{"id":81,"name":"Gambia (the)","iso_code":"GM"},{"id":82,"name":"Georgia","iso_code":"GE"},{"id":83,"name":"Germany","iso_code":"DE"},{"id":84,"name":"Ghana","iso_code":"GH"},{"id":85,"name":"Gibraltar","iso_code":"GI"},{"id":86,"name":"Greece","iso_code":"GR"},{"id":87,"name":"Greenland","iso_code":"GL"},{"id":88,"name":"Grenada","iso_code":"GD"},{"id":89,"name":"Guadeloupe","iso_code":"GP"},{"id":90,"name":"Guam","iso_code":"GU"},{"id":91,"name":"Guatemala","iso_code":"GT"},{"id":92,"name":"Guernsey","iso_code":"GG"},{"id":93,"name":"Guinea","iso_code":"GN"},{"id":94,"name":"Guinea-Bissau","iso_code":"GW"},{"id":95,"name":"Guyana","iso_code":"GY"},{"id":96,"name":"Haiti","iso_code":"HT"},{"id":97,"name":"Heard Island and McDonald Islands","iso_code":"HM"},{"id":98,"name":"Holy See (the)","iso_code":"VA"},{"id":99,"name":"Honduras","iso_code":"HN"},{"id":100,"name":"Hong Kong","iso_code":"HK"},{"id":101,"name":"Hungary","iso_code":"HU"},{"id":102,"name":"Iceland","iso_code":"IS"},{"id":103,"name":"India","iso_code":"IN"},{"id":104,"name":"Indonesia","iso_code":"ID"},{"id":250,"name":"International Agency","iso_code":"OI"},{"id":105,"name":"Iran (Islamic Republic of)","iso_code":"IR"},{"id":106,"name":"Iraq","iso_code":"IQ"},{"id":107,"name":"Ireland","iso_code":"IE"},{"id":108,"name":"Isle of Man","iso_code":"IM"},{"id":109,"name":"Israel","iso_code":"IL"},{"id":110,"name":"Italy","iso_code":"IT"},{"id":111,"name":"Jamaica","iso_code":"JM"},{"id":112,"name":"Japan","iso_code":"JP"},{"id":113,"name":"Jersey","iso_code":"JE"},{"id":114,"name":"Jordan","iso_code":"JO"},{"id":115,"name":"Kazakhstan","iso_code":"KZ"},{"id":116,"name":"Kenya","iso_code":"KE"},{"id":117,"name":"Kiribati","iso_code":"KI"},{"id":118,"name":"Korea (the Democratic People's Republic of)","iso_code":"KP"},{"id":119,"name":"Korea (the Republic of)","iso_code":"KR"},{"id":120,"name":"Kuwait","iso_code":"KW"},{"id":121,"name":"Kyrgyzstan","iso_code":"KG"},{"id":122,"name":"Lao People's Democratic Republic (the)","iso_code":"LA"},{"id":123,"name":"Latvia","iso_code":"LV"},{"id":124,"name":"Lebanon","iso_code":"LB"},{"id":125,"name":"Lesotho","iso_code":"LS"},{"id":126,"name":"Liberia","iso_code":"LR"},{"id":127,"name":"Libya","iso_code":"LY"},{"id":128,"name":"Liechtenstein","iso_code":"LI"},{"id":129,"name":"Lithuania","iso_code":"LT"},{"id":130,"name":"Luxembourg","iso_code":"LU"},{"id":131,"name":"Macao","iso_code":"MO"},{"id":132,"name":"Madagascar","iso_code":"MG"},{"id":133,"name":"Malawi","iso_code":"MW"},{"id":134,"name":"Malaysia","iso_code":"MY"},{"id":135,"name":"Maldives","iso_code":"MV"},{"id":136,"name":"Mali","iso_code":"ML"},{"id":137,"name":"Malta","iso_code":"MT"},{"id":138,"name":"Marshall Islands (the)","iso_code":"MH"},{"id":139,"name":"Martinique","iso_code":"MQ"},{"id":140,"name":"Mauritania","iso_code":"MR"},{"id":141,"name":"Mauritius","iso_code":"MU"},{"id":142,"name":"Mayotte","iso_code":"YT"},{"id":143,"name":"Mexico","iso_code":"MX"},{"id":144,"name":"Micronesia (Federated States of)","iso_code":"FM"},{"id":145,"name":"Moldova (the Republic of)","iso_code":"MD"},{"id":146,"name":"Monaco","iso_code":"MC"},{"id":147,"name":"Mongolia","iso_code":"MN"},{"id":148,"name":"Montenegro","iso_code":"ME"},{"id":149,"name":"Montserrat","iso_code":"MS"},{"id":150,"name":"Morocco","iso_code":"MA"},{"id":151,"name":"Mozambique","iso_code":"MZ"},{"id":152,"name":"Myanmar","iso_code":"MM"},{"id":153,"name":"Namibia","iso_code":"NA"},{"id":154,"name":"Nauru","iso_code":"NR"},{"id":155,"name":"Nepal","iso_code":"NP"},{"id":156,"name":"Netherlands (the)","iso_code":"NL"},{"id":157,"name":"New Caledonia","iso_code":"NC"},{"id":158,"name":"New Zealand","iso_code":"NZ"},{"id":159,"name":"Nicaragua","iso_code":"NI"},{"id":160,"name":"Niger (the)","iso_code":"NE"},{"id":161,"name":"Nigeria","iso_code":"NG"},{"id":162,"name":"Niue","iso_code":"NU"},{"id":163,"name":"Norfolk Island","iso_code":"NF"},{"id":164,"name":"North Macedonia","iso_code":"MK"},{"id":165,"name":"Northern Mariana Islands (the)","iso_code":"MP"},{"id":166,"name":"Norway","iso_code":"NO"},{"id":167,"name":"Oman","iso_code":"OM"},{"id":168,"name":"Pakistan","iso_code":"PK"},{"id":169,"name":"Palau","iso_code":"PW"},{"id":170,"name":"Palestine, State of","iso_code":"PS"},{"id":171,"name":"Panama","iso_code":"PA"},{"id":172,"name":"Papua New Guinea","iso_code":"PG"},{"id":173,"name":"Paraguay","iso_code":"PY"},{"id":174,"name":"Peru","iso_code":"PE"},{"id":175,"name":"Philippines (the)","iso_code":"PH"},{"id":176,"name":"Pitcairn","iso_code":"PN"},{"id":177,"name":"Poland","iso_code":"PL"},{"id":178,"name":"Portugal","iso_code":"PT"},{"id":179,"name":"Puerto Rico","iso_code":"PR"},{"id":180,"name":"Qatar","iso_code":"QA"},{"id":184,"name":"Réunion","iso_code":"RE"},{"id":181,"name":"Romania","iso_code":"RO"},{"id":182,"name":"Russian Federation (the)","iso_code":"RU"},{"id":183,"name":"Rwanda","iso_code":"RW"},{"id":185,"name":"Saint Barthélemy","iso_code":"BL"},{"id":186,"name":"Saint Helena, Ascension and Tristan da Cunha","iso_code":"SH"},{"id":187,"name":"Saint Kitts and Nevis","iso_code":"KN"},{"id":188,"name":"Saint Lucia","iso_code":"LC"},{"id":189,"name":"Saint Martin (French part)","iso_code":"MF"},{"id":190,"name":"Saint Pierre and Miquelon","iso_code":"PM"},{"id":191,"name":"Saint Vincent and the Grenadines","iso_code":"VC"},{"id":192,"name":"Samoa","iso_code":"WS"},{"id":193,"name":"San Marino","iso_code":"SM"},{"id":194,"name":"Sao Tome and Principe","iso_code":"ST"},{"id":195,"name":"Saudi Arabia","iso_code":"SA"},{"id":196,"name":"Senegal","iso_code":"SN"},{"id":197,"name":"Serbia","iso_code":"RS"},{"id":198,"name":"Seychelles","iso_code":"SC"},{"id":199,"name":"Sierra Leone","iso_code":"SL"},{"id":1000000,"name":"Sildavie2","iso_code":"II"},{"id":200,"name":"Singapore","iso_code":"SG"},{"id":201,"name":"Sint Maarten (Dutch part)","iso_code":"SX"},{"id":202,"name":"Slovakia","iso_code":"SK"},{"id":203,"name":"Slovenia","iso_code":"SI"},{"id":204,"name":"Solomon Islands","iso_code":"SB"},{"id":205,"name":"Somalia","iso_code":"SO"},{"id":206,"name":"South Africa","iso_code":"ZA"},{"id":207,"name":"South Georgia and the South Sandwich Islands","iso_code":"GS"},{"id":208,"name":"South Sudan","iso_code":"SS"},{"id":209,"name":"Spain","iso_code":"ES"},{"id":210,"name":"Sri Lanka","iso_code":"LK"},{"id":211,"name":"Sudan (the)","iso_code":"SD"},{"id":212,"name":"Suriname","iso_code":"SR"},{"id":213,"name":"Svalbard and Jan Mayen","iso_code":"SJ"},{"id":214,"name":"Sweden","iso_code":"SE"},{"id":215,"name":"Switzerland","iso_code":"CH"},{"id":216,"name":"Syrian Arab Republic (the)","iso_code":"SY"},{"id":217,"name":"Taiwan (Province of China)","iso_code":"TW"},{"id":218,"name":"Tajikistan","iso_code":"TJ"},{"id":219,"name":"Tanzania, the United Republic of","iso_code":"TZ"},{"id":220,"name":"Thailand","iso_code":"TH"},{"id":221,"name":"Timor-Leste","iso_code":"TL"},{"id":222,"name":"Togo","iso_code":"TG"},{"id":223,"name":"Tokelau","iso_code":"TK"},{"id":224,"name":"Tonga","iso_code":"TO"},{"id":225,"name":"Trinidad and Tobago","iso_code":"TT"},{"id":226,"name":"Tunisia","iso_code":"TN"},{"id":227,"name":"Turkey","iso_code":"TR"},{"id":228,"name":"Turkmenistan","iso_code":"TM"},{"id":229,"name":"Turks and Caicos Islands (the)","iso_code":"TC"},{"id":230,"name":"Tuvalu","iso_code":"TV"},{"id":231,"name":"Uganda","iso_code":"UG"},{"id":232,"name":"Ukraine","iso_code":"UA"},{"id":233,"name":"United Arab Emirates (the)","iso_code":"AE"},{"id":234,"name":"United Kingdom of Great Britain and Northern Ireland (the)","iso_code":"GB"},{"id":235,"name":"United States Minor Outlying Islands (the)","iso_code":"UM"},{"id":236,"name":"United States of America (the)","iso_code":"US"},{"id":999999,"name":"UNKNOWN","iso_code":"__"},{"id":237,"name":"Uruguay","iso_code":"UY"},{"id":238,"name":"Uzbekistan","iso_code":"UZ"},{"id":239,"name":"Vanuatu","iso_code":"VU"},{"id":240,"name":"Venezuela (Bolivarian Republic of)","iso_code":"VE"},{"id":241,"name":"Viet Nam","iso_code":"VN"},{"id":242,"name":"Virgin Islands (British)","iso_code":"VG"},{"id":243,"name":"Virgin Islands (U.S.)","iso_code":"VI"},{"id":244,"name":"Wallis and Futuna","iso_code":"WF"},{"id":245,"name":"Western Sahara*","iso_code":"EH"},{"id":246,"name":"Yemen","iso_code":"YE"},{"id":247,"name":"Zambia","iso_code":"ZM"},{"id":248,"name":"Zimbabwe","iso_code":"ZW"}] - - - -```python -# test avec authentification -url = 'https://oacct-test.epfl.ch/api/country/3' -r2 = requests.get(url, auth=(oacct_login, oacct_pwd)) -print(r2) -``` - - - - - -```python -print(r2.text) -``` - - {"id":3,"name":"Algeria","iso_code":"DZ"} - - - -```python -journal = { - "id": 1, - "name": "Revue médicale suisse", - "name_short_iso_4": "Rev. méd. suisse", - "starting_year": "2005", - "end_year": "9999", - "website": "", - "country": 215.0, - "language": "138", - "publisher": "1", - "doaj_seal": 0, - "doaj_status": 0, - "lockss": 0, - "portico": 0, - "nlch": 0, - "qoam_av_score": "", - "oa_status": 1, - "issn": "1234-5678" - } -``` - - -```python -# test avec post -url = 'https://oacct-test.epfl.ch/api/journal/' -r2 = requests.post(url, auth=(oacct_login, oacct_pwd), headers=headers, data=journal) -print(r2) -``` - - - - - -```python -print(r2.text) -``` - - {"issn":["This field is required."]} - - - -```python -country = { - "name": "Sildavie", - "iso_code": "II", - "id": 333 - } -``` - - -```python -# test avec post -url = 'https://oacct-test.epfl.ch/api/country/' -r2 = requests.post(url, auth=(oacct_login, oacct_pwd), headers=headers, data=country) -print(r2) -``` - - - - - -```python -print(r2.json()) -``` - - {'id': 1000001, 'name': 'Sildavie', 'iso_code': 'II'} - - - -```python -country2 = { - "id": 1000000, - "name": "Sildavie3", - "iso_code": "II" -} -``` - - -```python -# test avec put -url = 'https://oacct-test.epfl.ch/api/country/1000000' -r2 = requests.put(url, auth=(oacct_login, oacct_pwd), headers=headers, data=country2) -print(r2) -``` - - - - - -```python -print(r2.json()) -``` - - {'id': 1000000, 'name': 'Sildavie2', 'iso_code': 'II'} - - - -```python -# convert to json -json_response = r2.json() -print(json_response) -``` - - {'id': 1000000, 'name': 'Sildavie2', 'iso_code': 'II'} - - - -```python -# get the name -name = json_response['name'] -name -``` - - - - - 'Sildavie2' - - diff --git a/import_scripts/99_oacct_import.py b/import_scripts/99_oacct_import.py deleted file mode 100644 index 8e2588c1..00000000 --- a/import_scripts/99_oacct_import.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'importer les données en utilisant l'API : -# -# https://oacct-test.epfl.ch/api/ -# -# Exemple avec Journals : -# -# https://oacct-test.epfl.ch/api/journal/ -# -# GET /api/journal/ -# -# HTTP 200 OK -# Allow: GET, POST, HEAD, OPTIONS -# Content-Type: application/json -# Vary: Accept -# -# [] -# -# Media type: application/json -# -# Content: -# ``` json -# { -# "issn": [], -# "name": "", -# "name_short_iso_4": "", -# "website": "", -# "oa_options": "", -# "starting_year": null, -# "end_year": null, -# "doaj_seal": false, -# "doaj_status": false, -# "lockss": false, -# "nlch": false, -# "portico": false, -# "qoam_av_score": null -# } -# ``` -# - -# In[1]: - - -import json -import requests -import codecs -oacct_login = 'oacct_test' -oacct_pwd = '2f4dBRhyj7' -headers = {'accept': 'application/json'} - - -# In[2]: - - -# test sans authentifications -url = 'https://oacct-test.epfl.ch/api/country/' -r = requests.get(url) -print(r) - - -# In[3]: - - -print(r.text) - - -# In[6]: - - -# test avec authentification -url = 'https://oacct-test.epfl.ch/api/country/3' -r2 = requests.get(url, auth=(oacct_login, oacct_pwd)) -print(r2) - - -# In[7]: - - -print(r2.text) - - -# In[9]: - - -journal = { - "id": 1, - "name": "Revue médicale suisse", - "name_short_iso_4": "Rev. méd. suisse", - "starting_year": "2005", - "end_year": "9999", - "website": "", - "country": 215.0, - "language": "138", - "publisher": "1", - "doaj_seal": 0, - "doaj_status": 0, - "lockss": 0, - "portico": 0, - "nlch": 0, - "qoam_av_score": "", - "oa_status": 1, - "issn": "1234-5678" - } - - -# In[11]: - - -# test avec post -url = 'https://oacct-test.epfl.ch/api/journal/' -r2 = requests.post(url, auth=(oacct_login, oacct_pwd), headers=headers, data=journal) -print(r2) - - -# In[12]: - - -print(r2.text) - - -# In[13]: - - -country = { - "name": "Sildavie", - "iso_code": "II", - "id": 333 - } - - -# In[14]: - - -# test avec post -url = 'https://oacct-test.epfl.ch/api/country/' -r2 = requests.post(url, auth=(oacct_login, oacct_pwd), headers=headers, data=country) -print(r2) - - -# In[15]: - - -print(r2.json()) - - -# In[16]: - - -country2 = { - "id": 1000000, - "name": "Sildavie3", - "iso_code": "II" -} - - -# In[17]: - - -# test avec put -url = 'https://oacct-test.epfl.ch/api/country/1000000' -r2 = requests.put(url, auth=(oacct_login, oacct_pwd), headers=headers, data=country2) -print(r2) - - -# In[18]: - - -print(r2.json()) - - -# In[19]: - - -# convert to json -json_response = r2.json() -print(json_response) - - -# In[20]: - - -# get the name -name = json_response['name'] -name - diff --git a/import_scripts/README.md b/import_scripts/README.md deleted file mode 100644 index 041865ab..00000000 --- a/import_scripts/README.md +++ /dev/null @@ -1,9 +0,0 @@ -Original IPython notebooks converted to pure Python scripts and Markdown documents: - -``` -ipython nbconvert --to script *.ipynb -ipython nbconvert --to markdown *.ipynb -``` -=> easier Git version control - -Snapshot on 2021-09-23 AB