Page MenuHomec4science

gen_it.py
No OneTemporary

File Metadata

Created
Mon, Oct 7, 13:21

gen_it.py

import random
from gen_master import *
import copy
class GenDataIT(GenData):
def __init__(self, home_path):
super().__init__(home_path)
self.abrev = "it"
self.keys = ["surname", "givenName", "dateOfBirth", "height", "sex", "dateOfIssue", "dateOfExpiry", "nationality",
"identityCard", "placeOfBirth"]
self.keys_all = ["surname", "givenName", "dateOfBirth", "height", "sex", "authority", "dateOfIssue", "dateOfExpiry", "nationality",
"identityCard", "signature", "parents", "fiscalCode", "birthCode", "address"]
self.schema = {
"surname": "COGNOME - SURNAME",
"givenName": "NOME - NAME",
"dateOfBirth": "LUOGO E DATA DI NASCITA - PLACE AND DATE OF BIRTH",
"height": "STATURA - HEIGHT",
"sex": "SESSO - SEX",
"authority": "COMUNE DI - MUNICIPALITY",
"dateOfIssue": "EMISSIONE - ISSUING",
"dateOfExpiry": "SCADENZA - EXPIRY",
"nationality": "CITTADINANZA - NATIONALITY",
"identityCard": "CARTA DI IDENTITA - IDENTITY CARD",
"signature": "FIRMA DEL TITOLARE - HOLDER'S SIGNATURE",
"parents": "COGNOME E NOME DE GENITORI O DI CHI NE FA LE VECI - SURNAME AND NAME OF PARENTS OR LEGAL GUARDIAN",
"fiscalCode": "CODICE FISCALE - FISCAL CODE",
"birthCode": "ESTREMAKATTO DI NASCITA",
"address": "INDIRIZZO DI RESIDENZA - RESIDENCE",
}
# protected functions
def _get_firstName(self):
with open('data_italy.json', 'r') as fp:
data = json.load(fp)
random.seed(42)
# only keep first name, sometimes there are "Bob and Anna"
first_names = [x.split(', ', 2)[1] for x in data["name"] if (", " in x)]
first_names = [x for x in first_names if x is not None]
first_names = [x.split(' ')[0] for x in first_names]
return first_names
def _get_lastName(self):
with open('data_italy.json', 'r') as fp:
data = json.load(fp)
random.seed(42)
last_names = [x.split(', ', 2)[0] for x in data["name"] if (", " in x)]
last_names = [x for x in last_names if x is not None]
return last_names
def _get_city(self):
with open('data_geneva_city.json', 'r') as fp:
data = json.load(fp)
return data['region_it']
def _gen_date_(self, d, m, y):
return self._strnum(d) + "." + self._strnum(m) + "." + self._strnum(y)
def _gen_identityCard(self):
abc = "ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890123456789012345678901234567890123456789"
nb = ""
while len(nb) < 9:
nb += abc[random.randint(0, len(abc) - 1)]
return nb
def _get_answer(self, key):
# is not yet selfconsistent : age-height; dateofIssue-dateof Expiery
if key == "surname":
return self.fake_data["last name"]
elif key == "givenName":
return self.fake_data["first name"]
elif key == "dateOfBirth":
return self.fake_data["city"], self._gen_date(1940, 2020)
elif key == "height":
return [random.randint(100, 180)]
elif key == "dateOfIssue":
return self._gen_date(2010, 2020)
elif key == "dateOfExpiry":
return self._gen_date(2020, 2030)
elif key == "sex":
return ["M", "F"]
elif key == "identityCard":
return [self._gen_identityCard()]
elif key == "nationality":
return ["ITA"]
else:
raise NotImplementedError("key not known")
def _fill_schema(self, question, nb_keys="all"):
# special case of italy, because place of birth and date of birth are a combined label
tmp = copy.deepcopy(self.keys)
tmp.remove("placeOfBirth")
if nb_keys != "all":
random.shuffle(tmp)
tmp = tmp[:nb_keys]
if question in tmp:
pass
elif question == "placeOfBirth" and "dateOfBirth" in tmp:
pass
elif question == "placeOfBirth":
tmp[random.randint(0, len(tmp) - 1)] = "dateOfBirth"
else:
# replace a random entry
tmp[random.randint(0, len(tmp) - 1)] = question
context = ""
answer = ""
for key in tmp:
if key != "dateOfBirth":
ans = self._get_answer(key)
ans = ans[random.randint(0, len(ans)-1)]
if key == question:
answer = ans
else:
ans1, ans2 = self._get_answer(key)
ans1 = ans1[random.randint(0, len(ans1))]
ans2 = ans2[random.randint(0, len(ans2))]
ans = str(ans1) + " " + str(ans2)
if question == "dateOfBirth":
answer = ans2
elif question == "placeOfBirth":
answer = ans1
context += self.schema[key] + ": " + str(ans) + "; "
return context, question, answer

Event Timeline