Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F86754289
02_oacct_languages.md
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Oct 8, 10:23
Size
12 KB
Mime Type
text/html
Expires
Thu, Oct 10, 10:23 (2 d)
Engine
blob
Format
Raw Data
Handle
21478449
Attached To
rOACCT Open Access Compliance Check Tool (OACCT)
02_oacct_languages.md
View Options
#
Projet
Open
Access
Compliance
Check
Tool
(
OACCT
)
Projet
P5
de
la
biblioth
è
que
de
l
'
EPFL
en
collaboration
avec
les
biblioth
è
ques
des
Universit
é
s
de
Gen
è
ve
,
Lausanne
et
Berne
:
https
:
//www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1
Ce
notebook
permet
d
'
extraire
les
donn
é
es
choisis
parmis
les
sources
obtenues
par
API
et
les
traiter
pour
les
rendre
exploitables
dans
l
'
application
OACCT
.
Auteur
:
**
Pablo
Iriarte
**,
Universit
é
de
Gen
è
ve
(
pablo
.
iriarte
@
unige
.
ch
)
Date
de
derni
è
re
mise
à
jour
:
16.07.2021
```
python
import
pandas
as
pd
import
csv
import
json
import
numpy
as
np
```
##
Table
Language
```
python
#
https
:
//www.loc.gov/standards/iso639-2/php/code_list.php
#
ISO
639
-
2
Code
ISO
639
-
1
Code
English
name
of
Language
French
name
of
Language
German
name
of
Language
language
=
pd
.
read_csv
(
'
ISO
-
639
-
2_
utf
-
8
.
txt
'
,
encoding
=
'
utf
-
8
'
,
header
=
None
,
sep
=
'|'
,
na_filter
=
False
,
names
=[
'
ISO
639
-
2
Code
'
,
'
ISO
639
-
1
Code
'
,
'
ignore
'
,
'
English
name
of
Language
'
,
'
French
name
of
Language
'
],
index_col
=
False
)
language
```
<
div
>
<
style
scoped
>
.
dataframe
tbody
tr
th
:
only
-
of
-
type
{
vertical
-
align
:
middle
;
}
.
dataframe
tbody
tr
th
{
vertical
-
align
:
top
;
}
.
dataframe
thead
th
{
text
-
align
:
right
;
}
</
style
>
<
table
border
=
"1"
class
=
"dataframe"
>
<
thead
>
<
tr
style
=
"text-align: right;"
>
<
th
></
th
>
<
th
>
ISO
639
-
2
Code
</
th
>
<
th
>
ISO
639
-
1
Code
</
th
>
<
th
>
ignore
</
th
>
<
th
>
English
name
of
Language
</
th
>
<
th
>
French
name
of
Language
</
th
>
</
tr
>
</
thead
>
<
tbody
>
<
tr
>
<
td
>
0
</
td
>
<
td
>
aar
</
td
>
<
td
></
td
>
<
td
>
aa
</
td
>
<
td
>
Afar
</
td
>
<
td
>
afar
</
td
>
</
tr
>
<
tr
>
<
td
>
1
</
td
>
<
td
>
abk
</
td
>
<
td
></
td
>
<
td
>
ab
</
td
>
<
td
>
Abkhazian
</
td
>
<
td
>
abkhaze
</
td
>
</
tr
>
<
tr
>
<
td
>
2
</
td
>
<
td
>
ace
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Achinese
</
td
>
<
td
>
aceh
</
td
>
</
tr
>
<
tr
>
<
td
>
3
</
td
>
<
td
>
ach
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Acoli
</
td
>
<
td
>
acoli
</
td
>
</
tr
>
<
tr
>
<
td
>
4
</
td
>
<
td
>
ada
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Adangme
</
td
>
<
td
>
adangme
</
td
>
</
tr
>
<
tr
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
</
tr
>
<
tr
>
<
td
>
482
</
td
>
<
td
>
znd
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Zande
languages
</
td
>
<
td
>
zand
é
,
langues
</
td
>
</
tr
>
<
tr
>
<
td
>
483
</
td
>
<
td
>
zul
</
td
>
<
td
></
td
>
<
td
>
zu
</
td
>
<
td
>
Zulu
</
td
>
<
td
>
zoulou
</
td
>
</
tr
>
<
tr
>
<
td
>
484
</
td
>
<
td
>
zun
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Zuni
</
td
>
<
td
>
zuni
</
td
>
</
tr
>
<
tr
>
<
td
>
485
</
td
>
<
td
>
zxx
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
No
linguistic
content
;
Not
applicable
</
td
>
<
td
>
pas
de
contenu
linguistique
;
non
applicable
</
td
>
</
tr
>
<
tr
>
<
td
>
486
</
td
>
<
td
>
zza
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Zaza
;
Dimili
;
Dimli
;
Kirdki
;
Kirmanjki
;
Zazaki
</
td
>
<
td
>
zaza
;
dimili
;
dimli
;
kirdki
;
kirmanjki
;
zazaki
</
td
>
</
tr
>
</
tbody
>
</
table
>
<
p
>
487
rows
×
5
columns
</
p
>
</
div
>
```
python
language
.
loc
[
language
[
'
ISO
639
-
2
Code
'
].
isnull
()]
```
<
div
>
<
style
scoped
>
.
dataframe
tbody
tr
th
:
only
-
of
-
type
{
vertical
-
align
:
middle
;
}
.
dataframe
tbody
tr
th
{
vertical
-
align
:
top
;
}
.
dataframe
thead
th
{
text
-
align
:
right
;
}
</
style
>
<
table
border
=
"1"
class
=
"dataframe"
>
<
thead
>
<
tr
style
=
"text-align: right;"
>
<
th
></
th
>
<
th
>
ISO
639
-
2
Code
</
th
>
<
th
>
ISO
639
-
1
Code
</
th
>
<
th
>
ignore
</
th
>
<
th
>
English
name
of
Language
</
th
>
<
th
>
French
name
of
Language
</
th
>
</
tr
>
</
thead
>
<
tbody
>
</
tbody
>
</
table
>
</
div
>
```
python
#
convertir
l
'
index
en
id
language
=
language
.
reset_index
()
language
```
<
div
>
<
style
scoped
>
.
dataframe
tbody
tr
th
:
only
-
of
-
type
{
vertical
-
align
:
middle
;
}
.
dataframe
tbody
tr
th
{
vertical
-
align
:
top
;
}
.
dataframe
thead
th
{
text
-
align
:
right
;
}
</
style
>
<
table
border
=
"1"
class
=
"dataframe"
>
<
thead
>
<
tr
style
=
"text-align: right;"
>
<
th
></
th
>
<
th
>
index
</
th
>
<
th
>
ISO
639
-
2
Code
</
th
>
<
th
>
ISO
639
-
1
Code
</
th
>
<
th
>
ignore
</
th
>
<
th
>
English
name
of
Language
</
th
>
<
th
>
French
name
of
Language
</
th
>
</
tr
>
</
thead
>
<
tbody
>
<
tr
>
<
td
>
0
</
td
>
<
td
>
0
</
td
>
<
td
>
aar
</
td
>
<
td
></
td
>
<
td
>
aa
</
td
>
<
td
>
Afar
</
td
>
<
td
>
afar
</
td
>
</
tr
>
<
tr
>
<
td
>
1
</
td
>
<
td
>
1
</
td
>
<
td
>
abk
</
td
>
<
td
></
td
>
<
td
>
ab
</
td
>
<
td
>
Abkhazian
</
td
>
<
td
>
abkhaze
</
td
>
</
tr
>
<
tr
>
<
td
>
2
</
td
>
<
td
>
2
</
td
>
<
td
>
ace
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Achinese
</
td
>
<
td
>
aceh
</
td
>
</
tr
>
<
tr
>
<
td
>
3
</
td
>
<
td
>
3
</
td
>
<
td
>
ach
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Acoli
</
td
>
<
td
>
acoli
</
td
>
</
tr
>
<
tr
>
<
td
>
4
</
td
>
<
td
>
4
</
td
>
<
td
>
ada
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Adangme
</
td
>
<
td
>
adangme
</
td
>
</
tr
>
<
tr
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
</
tr
>
<
tr
>
<
td
>
482
</
td
>
<
td
>
482
</
td
>
<
td
>
znd
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Zande
languages
</
td
>
<
td
>
zand
é
,
langues
</
td
>
</
tr
>
<
tr
>
<
td
>
483
</
td
>
<
td
>
483
</
td
>
<
td
>
zul
</
td
>
<
td
></
td
>
<
td
>
zu
</
td
>
<
td
>
Zulu
</
td
>
<
td
>
zoulou
</
td
>
</
tr
>
<
tr
>
<
td
>
484
</
td
>
<
td
>
484
</
td
>
<
td
>
zun
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Zuni
</
td
>
<
td
>
zuni
</
td
>
</
tr
>
<
tr
>
<
td
>
485
</
td
>
<
td
>
485
</
td
>
<
td
>
zxx
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
No
linguistic
content
;
Not
applicable
</
td
>
<
td
>
pas
de
contenu
linguistique
;
non
applicable
</
td
>
</
tr
>
<
tr
>
<
td
>
486
</
td
>
<
td
>
486
</
td
>
<
td
>
zza
</
td
>
<
td
></
td
>
<
td
></
td
>
<
td
>
Zaza
;
Dimili
;
Dimli
;
Kirdki
;
Kirmanjki
;
Zazaki
</
td
>
<
td
>
zaza
;
dimili
;
dimli
;
kirdki
;
kirmanjki
;
zazaki
</
td
>
</
tr
>
</
tbody
>
</
table
>
<
p
>
487
rows
×
6
columns
</
p
>
</
div
>
```
python
language
[
'
id
'
]
=
language
[
'
index
'
]
+
1
del
language
[
'
index
'
]
del
language
[
'
ignore
'
]
del
language
[
'
French
name
of
Language
'
]
del
language
[
'
ISO
639
-
1
Code
'
]
language
```
<
div
>
<
style
scoped
>
.
dataframe
tbody
tr
th
:
only
-
of
-
type
{
vertical
-
align
:
middle
;
}
.
dataframe
tbody
tr
th
{
vertical
-
align
:
top
;
}
.
dataframe
thead
th
{
text
-
align
:
right
;
}
</
style
>
<
table
border
=
"1"
class
=
"dataframe"
>
<
thead
>
<
tr
style
=
"text-align: right;"
>
<
th
></
th
>
<
th
>
ISO
639
-
2
Code
</
th
>
<
th
>
English
name
of
Language
</
th
>
<
th
>
id
</
th
>
</
tr
>
</
thead
>
<
tbody
>
<
tr
>
<
td
>
0
</
td
>
<
td
>
aar
</
td
>
<
td
>
Afar
</
td
>
<
td
>
1
</
td
>
</
tr
>
<
tr
>
<
td
>
1
</
td
>
<
td
>
abk
</
td
>
<
td
>
Abkhazian
</
td
>
<
td
>
2
</
td
>
</
tr
>
<
tr
>
<
td
>
2
</
td
>
<
td
>
ace
</
td
>
<
td
>
Achinese
</
td
>
<
td
>
3
</
td
>
</
tr
>
<
tr
>
<
td
>
3
</
td
>
<
td
>
ach
</
td
>
<
td
>
Acoli
</
td
>
<
td
>
4
</
td
>
</
tr
>
<
tr
>
<
td
>
4
</
td
>
<
td
>
ada
</
td
>
<
td
>
Adangme
</
td
>
<
td
>
5
</
td
>
</
tr
>
<
tr
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
</
tr
>
<
tr
>
<
td
>
482
</
td
>
<
td
>
znd
</
td
>
<
td
>
Zande
languages
</
td
>
<
td
>
483
</
td
>
</
tr
>
<
tr
>
<
td
>
483
</
td
>
<
td
>
zul
</
td
>
<
td
>
Zulu
</
td
>
<
td
>
484
</
td
>
</
tr
>
<
tr
>
<
td
>
484
</
td
>
<
td
>
zun
</
td
>
<
td
>
Zuni
</
td
>
<
td
>
485
</
td
>
</
tr
>
<
tr
>
<
td
>
485
</
td
>
<
td
>
zxx
</
td
>
<
td
>
No
linguistic
content
;
Not
applicable
</
td
>
<
td
>
486
</
td
>
</
tr
>
<
tr
>
<
td
>
486
</
td
>
<
td
>
zza
</
td
>
<
td
>
Zaza
;
Dimili
;
Dimli
;
Kirdki
;
Kirmanjki
;
Zazaki
</
td
>
<
td
>
487
</
td
>
</
tr
>
</
tbody
>
</
table
>
<
p
>
487
rows
×
3
columns
</
p
>
</
div
>
```
python
#
renommer
les
colonnes
language
=
language
.
rename
(
columns
={
'
ISO
639
-
2
Code
'
:
'
iso_code
'
,
'
English
name
of
Language
'
:
'
name
'
})
```
```
python
language
```
<
div
>
<
style
scoped
>
.
dataframe
tbody
tr
th
:
only
-
of
-
type
{
vertical
-
align
:
middle
;
}
.
dataframe
tbody
tr
th
{
vertical
-
align
:
top
;
}
.
dataframe
thead
th
{
text
-
align
:
right
;
}
</
style
>
<
table
border
=
"1"
class
=
"dataframe"
>
<
thead
>
<
tr
style
=
"text-align: right;"
>
<
th
></
th
>
<
th
>
iso_code
</
th
>
<
th
>
name
</
th
>
<
th
>
id
</
th
>
</
tr
>
</
thead
>
<
tbody
>
<
tr
>
<
td
>
0
</
td
>
<
td
>
aar
</
td
>
<
td
>
Afar
</
td
>
<
td
>
1
</
td
>
</
tr
>
<
tr
>
<
td
>
1
</
td
>
<
td
>
abk
</
td
>
<
td
>
Abkhazian
</
td
>
<
td
>
2
</
td
>
</
tr
>
<
tr
>
<
td
>
2
</
td
>
<
td
>
ace
</
td
>
<
td
>
Achinese
</
td
>
<
td
>
3
</
td
>
</
tr
>
<
tr
>
<
td
>
3
</
td
>
<
td
>
ach
</
td
>
<
td
>
Acoli
</
td
>
<
td
>
4
</
td
>
</
tr
>
<
tr
>
<
td
>
4
</
td
>
<
td
>
ada
</
td
>
<
td
>
Adangme
</
td
>
<
td
>
5
</
td
>
</
tr
>
<
tr
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
</
tr
>
<
tr
>
<
td
>
482
</
td
>
<
td
>
znd
</
td
>
<
td
>
Zande
languages
</
td
>
<
td
>
483
</
td
>
</
tr
>
<
tr
>
<
td
>
483
</
td
>
<
td
>
zul
</
td
>
<
td
>
Zulu
</
td
>
<
td
>
484
</
td
>
</
tr
>
<
tr
>
<
td
>
484
</
td
>
<
td
>
zun
</
td
>
<
td
>
Zuni
</
td
>
<
td
>
485
</
td
>
</
tr
>
<
tr
>
<
td
>
485
</
td
>
<
td
>
zxx
</
td
>
<
td
>
No
linguistic
content
;
Not
applicable
</
td
>
<
td
>
486
</
td
>
</
tr
>
<
tr
>
<
td
>
486
</
td
>
<
td
>
zza
</
td
>
<
td
>
Zaza
;
Dimili
;
Dimli
;
Kirdki
;
Kirmanjki
;
Zazaki
</
td
>
<
td
>
487
</
td
>
</
tr
>
</
tbody
>
</
table
>
<
p
>
487
rows
×
3
columns
</
p
>
</
div
>
```
python
#
corriger
la
valeur
trop
longue
qaa
-
qtz
language
.
loc
[
language
[
'
iso_code
'
]
==
'
qaa
-
qtz
'
,
'
iso_code
'
]
=
'
qaa
'
```
```
python
#
ajout
de
la
valeur
UNKNOWN
language
=
language
.
append
({
'
id
'
:
999999
,
'
iso_code
'
:
'
___
'
,
'
name
'
:
'
UNKNOWN
'
},
ignore_index
=
True
)
language
```
<
div
>
<
style
scoped
>
.
dataframe
tbody
tr
th
:
only
-
of
-
type
{
vertical
-
align
:
middle
;
}
.
dataframe
tbody
tr
th
{
vertical
-
align
:
top
;
}
.
dataframe
thead
th
{
text
-
align
:
right
;
}
</
style
>
<
table
border
=
"1"
class
=
"dataframe"
>
<
thead
>
<
tr
style
=
"text-align: right;"
>
<
th
></
th
>
<
th
>
iso_code
</
th
>
<
th
>
name
</
th
>
<
th
>
id
</
th
>
</
tr
>
</
thead
>
<
tbody
>
<
tr
>
<
td
>
0
</
td
>
<
td
>
aar
</
td
>
<
td
>
Afar
</
td
>
<
td
>
1
</
td
>
</
tr
>
<
tr
>
<
td
>
1
</
td
>
<
td
>
abk
</
td
>
<
td
>
Abkhazian
</
td
>
<
td
>
2
</
td
>
</
tr
>
<
tr
>
<
td
>
2
</
td
>
<
td
>
ace
</
td
>
<
td
>
Achinese
</
td
>
<
td
>
3
</
td
>
</
tr
>
<
tr
>
<
td
>
3
</
td
>
<
td
>
ach
</
td
>
<
td
>
Acoli
</
td
>
<
td
>
4
</
td
>
</
tr
>
<
tr
>
<
td
>
4
</
td
>
<
td
>
ada
</
td
>
<
td
>
Adangme
</
td
>
<
td
>
5
</
td
>
</
tr
>
<
tr
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
<
td
>...</
td
>
</
tr
>
<
tr
>
<
td
>
483
</
td
>
<
td
>
zul
</
td
>
<
td
>
Zulu
</
td
>
<
td
>
484
</
td
>
</
tr
>
<
tr
>
<
td
>
484
</
td
>
<
td
>
zun
</
td
>
<
td
>
Zuni
</
td
>
<
td
>
485
</
td
>
</
tr
>
<
tr
>
<
td
>
485
</
td
>
<
td
>
zxx
</
td
>
<
td
>
No
linguistic
content
;
Not
applicable
</
td
>
<
td
>
486
</
td
>
</
tr
>
<
tr
>
<
td
>
486
</
td
>
<
td
>
zza
</
td
>
<
td
>
Zaza
;
Dimili
;
Dimli
;
Kirdki
;
Kirmanjki
;
Zazaki
</
td
>
<
td
>
487
</
td
>
</
tr
>
<
tr
>
<
td
>
487
</
td
>
<
td
>
___
</
td
>
<
td
>
UNKNOWN
</
td
>
<
td
>
999999
</
td
>
</
tr
>
</
tbody
>
</
table
>
<
p
>
488
rows
×
3
columns
</
p
>
</
div
>
```
python
#
esport
JSON
result
=
language
.
to_json
(
orient
=
'
records
'
,
force_ascii
=
False
)
parsed
=
json
.
loads
(
result
)
with
open
(
'
sample
/
language
.
json
'
,
'w'
,
encoding
=
'
utf
-
8
'
)
as
file
:
json
.
dump
(
parsed
,
file
,
indent
=
2
,
ensure_ascii
=
False
)
```
```
python
#
export
csv
language
.
to_csv
(
'
language
.
tsv
'
,
sep
=
'\t'
,
encoding
=
'
utf
-
8
'
,
index
=
False
)
```
```
python
#
export
csv
language
.
to_csv
(
'
sample
/
language
.
tsv
'
,
sep
=
'\t'
,
encoding
=
'
utf
-
8
'
,
index
=
False
)
```
```
python
#
export
excel
language
.
to_excel
(
'
sample
/
language
.
xlsx
'
,
index
=
False
)
```
Event Timeline
Log In to Comment