Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F86611170
gen_it.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Oct 7, 13:21
Size
5 KB
Mime Type
text/x-python
Expires
Wed, Oct 9, 13:21 (2 d)
Engine
blob
Format
Raw Data
Handle
21453994
Attached To
R11149 PDM-Nicola-Oulu
gen_it.py
View Options
import
random
from
gen_master
import
*
import
copy
class
GenDataIT
(
GenData
):
def
__init__
(
self
,
home_path
):
super
()
.
__init__
(
home_path
)
self
.
abrev
=
"it"
self
.
keys
=
[
"surname"
,
"givenName"
,
"dateOfBirth"
,
"height"
,
"sex"
,
"dateOfIssue"
,
"dateOfExpiry"
,
"nationality"
,
"identityCard"
,
"placeOfBirth"
]
self
.
keys_all
=
[
"surname"
,
"givenName"
,
"dateOfBirth"
,
"height"
,
"sex"
,
"authority"
,
"dateOfIssue"
,
"dateOfExpiry"
,
"nationality"
,
"identityCard"
,
"signature"
,
"parents"
,
"fiscalCode"
,
"birthCode"
,
"address"
]
self
.
schema
=
{
"surname"
:
"COGNOME - SURNAME"
,
"givenName"
:
"NOME - NAME"
,
"dateOfBirth"
:
"LUOGO E DATA DI NASCITA - PLACE AND DATE OF BIRTH"
,
"height"
:
"STATURA - HEIGHT"
,
"sex"
:
"SESSO - SEX"
,
"authority"
:
"COMUNE DI - MUNICIPALITY"
,
"dateOfIssue"
:
"EMISSIONE - ISSUING"
,
"dateOfExpiry"
:
"SCADENZA - EXPIRY"
,
"nationality"
:
"CITTADINANZA - NATIONALITY"
,
"identityCard"
:
"CARTA DI IDENTITA - IDENTITY CARD"
,
"signature"
:
"FIRMA DEL TITOLARE - HOLDER'S SIGNATURE"
,
"parents"
:
"COGNOME E NOME DE GENITORI O DI CHI NE FA LE VECI - SURNAME AND NAME OF PARENTS OR LEGAL GUARDIAN"
,
"fiscalCode"
:
"CODICE FISCALE - FISCAL CODE"
,
"birthCode"
:
"ESTREMAKATTO DI NASCITA"
,
"address"
:
"INDIRIZZO DI RESIDENZA - RESIDENCE"
,
}
# protected functions
def
_get_firstName
(
self
):
with
open
(
'data_italy.json'
,
'r'
)
as
fp
:
data
=
json
.
load
(
fp
)
random
.
seed
(
42
)
# only keep first name, sometimes there are "Bob and Anna"
first_names
=
[
x
.
split
(
', '
,
2
)[
1
]
for
x
in
data
[
"name"
]
if
(
", "
in
x
)]
first_names
=
[
x
for
x
in
first_names
if
x
is
not
None
]
first_names
=
[
x
.
split
(
' '
)[
0
]
for
x
in
first_names
]
return
first_names
def
_get_lastName
(
self
):
with
open
(
'data_italy.json'
,
'r'
)
as
fp
:
data
=
json
.
load
(
fp
)
random
.
seed
(
42
)
last_names
=
[
x
.
split
(
', '
,
2
)[
0
]
for
x
in
data
[
"name"
]
if
(
", "
in
x
)]
last_names
=
[
x
for
x
in
last_names
if
x
is
not
None
]
return
last_names
def
_get_city
(
self
):
with
open
(
'data_geneva_city.json'
,
'r'
)
as
fp
:
data
=
json
.
load
(
fp
)
return
data
[
'region_it'
]
def
_gen_date_
(
self
,
d
,
m
,
y
):
return
self
.
_strnum
(
d
)
+
"."
+
self
.
_strnum
(
m
)
+
"."
+
self
.
_strnum
(
y
)
def
_gen_identityCard
(
self
):
abc
=
"ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890123456789012345678901234567890123456789"
nb
=
""
while
len
(
nb
)
<
9
:
nb
+=
abc
[
random
.
randint
(
0
,
len
(
abc
)
-
1
)]
return
nb
def
_get_answer
(
self
,
key
):
# is not yet selfconsistent : age-height; dateofIssue-dateof Expiery
if
key
==
"surname"
:
return
self
.
fake_data
[
"last name"
]
elif
key
==
"givenName"
:
return
self
.
fake_data
[
"first name"
]
elif
key
==
"dateOfBirth"
:
return
self
.
fake_data
[
"city"
],
self
.
_gen_date
(
1940
,
2020
)
elif
key
==
"height"
:
return
[
random
.
randint
(
100
,
180
)]
elif
key
==
"dateOfIssue"
:
return
self
.
_gen_date
(
2010
,
2020
)
elif
key
==
"dateOfExpiry"
:
return
self
.
_gen_date
(
2020
,
2030
)
elif
key
==
"sex"
:
return
[
"M"
,
"F"
]
elif
key
==
"identityCard"
:
return
[
self
.
_gen_identityCard
()]
elif
key
==
"nationality"
:
return
[
"ITA"
]
else
:
raise
NotImplementedError
(
"key not known"
)
def
_fill_schema
(
self
,
question
,
nb_keys
=
"all"
):
# special case of italy, because place of birth and date of birth are a combined label
tmp
=
copy
.
deepcopy
(
self
.
keys
)
tmp
.
remove
(
"placeOfBirth"
)
if
nb_keys
!=
"all"
:
random
.
shuffle
(
tmp
)
tmp
=
tmp
[:
nb_keys
]
if
question
in
tmp
:
pass
elif
question
==
"placeOfBirth"
and
"dateOfBirth"
in
tmp
:
pass
elif
question
==
"placeOfBirth"
:
tmp
[
random
.
randint
(
0
,
len
(
tmp
)
-
1
)]
=
"dateOfBirth"
else
:
# replace a random entry
tmp
[
random
.
randint
(
0
,
len
(
tmp
)
-
1
)]
=
question
context
=
""
answer
=
""
for
key
in
tmp
:
if
key
!=
"dateOfBirth"
:
ans
=
self
.
_get_answer
(
key
)
ans
=
ans
[
random
.
randint
(
0
,
len
(
ans
)
-
1
)]
if
key
==
question
:
answer
=
ans
else
:
ans1
,
ans2
=
self
.
_get_answer
(
key
)
ans1
=
ans1
[
random
.
randint
(
0
,
len
(
ans1
))]
ans2
=
ans2
[
random
.
randint
(
0
,
len
(
ans2
))]
ans
=
str
(
ans1
)
+
" "
+
str
(
ans2
)
if
question
==
"dateOfBirth"
:
answer
=
ans2
elif
question
==
"placeOfBirth"
:
answer
=
ans1
context
+=
self
.
schema
[
key
]
+
": "
+
str
(
ans
)
+
"; "
return
context
,
question
,
answer
Event Timeline
Log In to Comment