Page MenuHomec4science

main.py
No OneTemporary

File Metadata

Created
Sat, Apr 27, 18:10
# score per question category
# entropy per question (if it's always the same, don't accept it? Prolem with nationality i.e.)
import os
import load_files as lf
import model as mo
import json
import copy
import pickle
import matplotlib.pyplot as plt
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
MULTIMODEL = False
# choose the model type
MODEL = "gpt" # gpt, bert
MODEL2 = "bert" # gpt, bert
MODEL3 = "bert" # gpt, bert
# train the model from scratch or from a checkpoint, not supported anymore
SCRATCH = False # True, False
# choose the checkpoint
CHECKPOINT = "gpt2" # xlm-roberta-base, gpt2, None, roberta-base, "gpt2-medium", "gpt2-large", "gpt2-xl"
CHECKPOINT2 = "roberta-base"
CHECKPOINT3 = "xlm-roberta-base"
# train the model in this run
TRAIN = False # True, False
# number of training epochs
EPOCHS = 1
# new model name
NAME = "xlm_e1_CHpart" # "gpt_e1_CHpart" # "xlm_e1_CHpart" # "roberta_e1_CHpart" # xmlr_e_10_test, gpt_e_1_test
NAME2 = "roberta_e1_CHpart"
NAME3 = "xlm_e1_CHpart"
# probability Modes to be tested
PROBA = ["forceNon0"]#, "longOk", ["mult", "forceNon0", "maxNon0"], ["mult", "longOk"]
PROBA2 = ["forceNon0"]
PROBA3 = ["forceNon0"]
# number of times the model should be retrained (0 is never)
RETRAIN = 3
home_path = os.getcwd()
# load a dataset
if MODEL == "gpt":
print("Start instances for a gpt2 model")
dataset = lf.DatasetGPT(home_path)
model = mo.GPTModel(home_path, printStep=-1)
elif MODEL == "bert":
print("Start instances for a bert model")
dataset = lf.DatasetBert(home_path)
model = mo.BertModel(home_path, printStep=-1)
else:
NotImplementedError("Model type not defined")
# load additional models, if ensemble learning is enabled
if MULTIMODEL:
if MODEL2 == "bert":
print("Start instances for a bert model")
dataset2 = lf.DatasetBert(home_path)
model2 = mo.BertModel(home_path, printStep=-1)
if MODEL3 == "bert":
print("Start instances for a bert model")
dataset3 = lf.DatasetBert(home_path)
model3 = mo.BertModel(home_path, printStep=-1)
combo_model = mo.ComboModel(home_path, printStep=-1,
models = [model, model2, model3],
ModelNames = [NAME, NAME2, NAME3],
probaModes = [PROBA[0], PROBA2[0], PROBA3[0]]
)
# load a dataset
# dataset.load_data()
dataset.load_data(dir="CH_part", end="", testset=1)
# train a tokenizer from scratch, not supported anymore
if SCRATCH:
NotImplementedError("Train a tokenizer")
SCRATCH = "loc of tok"
else:
SCRATCH = None
# train the model
if TRAIN:
model.train(nbEpochs=EPOCHS,
outModelName=NAME,
startCheckpoint=CHECKPOINT,
dataEnd="",
tokenizerLocaction=SCRATCH)
# load the model
model.load_model(NAME)
if MULTIMODEL:
# load all the models needed for ensemble learning
combo_model.load_model("***")
all_scores = []
def retrain(model, dataset, NAME, CHECKPOINT, EPOCHS, nb_used=100, end="", onlySave=True, console="",
dir2=None, testset=None, dataset_bert=None):
"""
Finetune a given model
:param model: Model, from the parent class Model
:param dataset: Dataset, from the parent class Dataset
:param NAME: The filename of the saved dataset containing sure guesses and unused samples
:param CHECKPOINT: folder from where to load the model/dataset
:param EPOCHS: number of training epochs
:param nb_used: number of used samples for training/evaluation
:param end: file name extension
:param onlySave: only save the results, but don't retrain the model
:param console: string where the console output is saved
:param dir2: directory containing a dataset from bert
:param testset: integer between 1 and 4, specifing which one of the test dataset is used
:param dataset_bert: dataset used for the bert models
:return:
model: finetuned model,
dataset: the new used testset,
NAME: new name of the model
CHECKPOINT: new directory for the saved files
"""
xsure, ysure = model.getSureGuesses()
xtest, ytest = dataset.get_test()
# replace everything with the wrong answer, to check if finetuning does it still improve performance
if False:
for i, _ in enumerate(ysure):
ysure[i] = "<unk>"
# check if any examples have been found above the limit confidence score
# otherwise finetuning can not be performed
if len(xsure) == 0:
print("{}\n{}\n{}\n{}\n{}".format("="*50, "="*50, "there are no sure guesses...", "="*50, "="*50))
return model, dataset, NAME, CHECKPOINT
# increase lenght of the test set, by dublicating it
# if the finetuning dataset is too small, the network is unable to finetune and will produce and error message
while len(xsure) < 100:
xsure = xsure + xsure
ysure = ysure + ysure
# save the remaining part of the dataset that is untouched and the sure guesses
dataset.save_data(xsure,
ysure,
NAME,
x_test=xtest[nb_used:],
y_test=ytest[nb_used:],
console=console,
# finetune the model )
if not onlySave:
CHECKPOINT = NAME
# change the name of the new model
NAME += "_adapt_" + end
# load the dataset
dataset.load_data(dir=CHECKPOINT, end="", dir2=dir2, testset=testset)
# load the dataset for the bert model (only used in ensemble learning)
dataset_bert.load_data(dir=CHECKPOINT, end="", dir2=dir2, testset=testset)
# attach the file ending if there is any
if end != "":
end = "_" + end
# finetune the model
model.train(nbEpochs=EPOCHS,
outModelName=NAME,
startCheckpoint=CHECKPOINT,
tokenizerLocaction=CHECKPOINT,
dataEnd="",
)
# load the new model
model.load_model(NAME)
return model, dataset, NAME, CHECKPOINT
def K_alpha(tp, tn, fp, fn, alpha):
"""
Calculate the W_alpha score (is 1 iff all examples are true positive
:param tp: list, amount of true positive examples for each threshold limit
:param tn: list, amount of true negative examples for each threshold limit
:param fp: list, amount of false positive examples for each threshold limit
:param fn: list, amount of false negative examples for each threshold limit
:param alpha: punishment factor for false positive examples
:return:
K: list of W_alpha scores (one for each threshold limit)
"""
K = []
for i, _ in enumerate(tp):
K.append(tp[i]/(tp[i]+fp[i]*alpha+tn[i]+fn[i]))
return K
def generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end = "", nb_used=1000,
dir2=None, testset=None, dataset_bert = None):
"""
Extract answers from the test set and summarize the results in diagrams
:param model: the used model
:param dataset: the used dataset
:param NAME: name of the model
:param RETRAIN: number finetuining trainings
:param CHECKPOINT: directory from where to load the model/dataset
:param EPOCHS: number of epochs used during finetuning
:param all_scores: list containing a summary of results form previous generations
:param end: file extension
:param nb_used: number of samples used from the test set
(usually not choosen above 1000, otherwise the execution takes a lot of time)
:param dir2: directory for the bert model, only used in ensemble learning
:param testset: integer usually between 1 and 4, specifing which test set is used (there are multible)
:param dataset_bert: dataset of bert, only used in ensemble learning
:return:
all_scores: list containing a summary of results, updated with current results
"""
# color used for different label categories in the plots
legend_color = {
"all": "black",
"sex": "goldenrod",
"height": "red",
"dateOfBirth": "plum",
"dateOfExpiry": "fuchsia",
"dateOfIssue": "deeppink",
"placeOfBirth": "darkgreen",
"surname": "lawngreen",
"givenName": "mediumseagreen",
"placeOfOrigin": "darkslategrey",
"identityCard": "darkred",
"nationality": "orange",
"eyeColor": "blue",
"fake_height": "black",
}
# markers used for different label categories in the plots
legend_marker = {
"all": "o",
"sex": "v",
"height": "^",
"dateOfBirth": "<",
"dateOfExpiry": ">",
"dateOfIssue": "*",
"placeOfBirth": "x",
"surname": "D",
"givenName": "o",
"placeOfOrigin": "v",
"identityCard": "^",
"nationality": "<",
"eyeColor": ">",
"fake_height": "*",
}
# legend name used for different label categories in the plots
legend_name = {
"all": "all",
"sex": "sex",
"height": "height",
"dateOfBirth": "date of birth",
"dateOfExpiry": "date of expiry",
"dateOfIssue": "date of issue",
"placeOfBirth": "place of birth",
"surname": "last name",
"givenName": "first name",
"placeOfOrigin": "place of origin",
"identityCard": "card number",
"nationality": "nationality",
"eyeColor": "eye color",
"fake_height": "fake_height",
}
# extract the information using the different selected token selection mechanisms (probability modes)
for m in PROBA:
# load the correct model
model.load_model(NAME)
# update the probability mode
model.set_proba_mode(m)
# save the name of the used model
NAME_r = NAME
# to the amount of specified fine-tune steps
for r in range(RETRAIN + 1):
# extract the informations from the specified test set
# set to False, if just the plots should be generated from previous saved extractions
if True:
# extract the answers
scores_dict, scores, console = model.generate(dataset.get_test(), 0, nb_used, data_bert = dataset_bert.get_test())
# update list of summarized results
all_scores.append(scores)
# save detailed results, such that figures can be created/modified at a later point
os.chdir(home_path)
with open("scores_dict_{}_{}_{}".format(r, dir2, MODEL) + ".p", "wb") as f:
pickle.dump([scores_dict, console], f)
# just generate figures
else:
# load results from previous extractions
os.chdir(home_path)
with open("scores_dict_{}_{}_{}".format(r, dir2, MODEL) + ".p", 'rb') as fp:
preds = pickle.load(fp)
scores_dict = preds[0]
console = preds[1]
# print all scores
# show amount of true positive, false positive, true negative and false negative
keys = scores_dict.keys()
buckets = len(scores_dict["all"]["count"])
lim = [x/buckets for x in range(buckets)]
for key in keys:
# old plots, have results presented as a line plot
if False:
plt.title(legend_name[key])
plt.plot(lim, scores_dict[key]["tp"], 'g+-', label="tp")
plt.plot(lim, scores_dict[key]["tn"], 'go--', label="tn")
plt.plot(lim, scores_dict[key]["fp"], 'r+-', label="fp")
plt.plot(lim, scores_dict[key]["fn"], 'ro--', label="fn")
tmp = [120, 500, 1200, 2000, 5000]
for upper in tmp:
if scores_dict[key]["tp"][0]+scores_dict[key]["fp"][0] < upper:
break
plt.ylim([0, upper])
plt.xlabel("network confidence score")
plt.ylabel("# examples")
plt.legend()
os.chdir(home_path)
plt.savefig("f_tp_{}_{}.eps".format(key, r), format="eps")
plt.savefig("f_tp_{}_{}.jpg".format(key, r), format="jpg")
plt.close()
# present results as a barplot
else:
# set the numbers in the bars manually as text label
# (allows to ignore 0 values, and have small font for small values)
manualNumberLabels = True
def putNumber(X, Y, bottom, ax, upper, pos=0, color="black"):
"""
Put a labeled field in the plot with the numeric value of the ploted bar
:param X: list of the x values
:param Y: list of the y values
:param bottom: lower end of the plotted bar (list)
:param ax: the figure that should be edited
:param upper: upper limit of the y axis
:param pos: how much the number should be shifted to the left/right
:param color: color of the font
:return:
ax: return the updated figure
"""
# get spacing of the bars on the x-axis
b = X[1]-X[0]
# set offset to center based on the y scale
if upper > 1000:
off = [15, 10]
else:
off = [2, 1]
# set a numeric label for all bars
for i, x in enumerate(X):
# if the value is bigger then 3% of the maximum representable value, use standart format
# (assuming the bar is big enought to contain the entire label)
if Y[i] > 0.03*upper:
ax.text(x-b/4, Y[i]/2+bottom[i]-off[0], str(int(Y[i])), color="black", fontsize=10, bbox={'edgecolor': "white", 'facecolor': 'white', 'alpha': 0, 'pad': 2})
# if the value is bigger then 0, but the bar is small, use smaller font and
# allow small horizontal deplacments to avoid overlap
elif Y[i] > 0:
ax.text(x-b/4+b/4*pos, Y[i] / 2 + bottom[i]-off[1], str(int(Y[i])), color="black", fontsize=6,
bbox={'edgecolor': "white", 'facecolor': 'white', 'alpha': 0, 'pad': 2})
return ax
width = 0.08
fig, ax = plt.subplots()
#115, 150
tmp = [150, 500, 1150, 2000, 5000]
for upper in tmp:
if scores_dict[key]["tp"][0] + scores_dict[key]["fp"][0] < upper:
break
plt.ylim([0, upper])
tmp = copy.deepcopy(scores_dict[key]["tp"])
for t, _ in enumerate(tmp):
tmp[t] = int(tmp[t])
p1 = ax.bar(lim, tmp , width, label='tp', color="lime", hatch='/', edgecolor="limegreen")
if manualNumberLabels:
ax = putNumber(lim, tmp, [0 for _ in lim], ax, upper, pos = 1.5, color="darkslategray")
bottom = copy.deepcopy(tmp)
tmp = copy.deepcopy(scores_dict[key]["tn"])
for t, _ in enumerate(tmp):
tmp[t] = int(tmp[t])
p2 = ax.bar(lim, tmp, width,
bottom=bottom, label='tn', color="orange", hatch='-', edgecolor="darkorange")
if manualNumberLabels:
ax = putNumber(lim, tmp, bottom, ax, upper, pos = -0.25, color="saddlebrown")
for i, _ in enumerate(bottom):
bottom[i] += tmp[i]
tmp = copy.deepcopy(scores_dict[key]["fn"])
for t, _ in enumerate(tmp):
tmp[t] = int(tmp[t])
p3 = ax.bar(lim, tmp, width,
bottom=bottom, label='fn', color="darkgreen", hatch='x', edgecolor="green")
if manualNumberLabels:
ax = putNumber(lim, tmp, bottom, ax, upper, pos=0.625, color="darkolivegreen")
for i, _ in enumerate(bottom):
bottom[i] += tmp[i]
tmp = copy.deepcopy(scores_dict[key]["fp"])
for t, _ in enumerate(tmp):
tmp[t] = int(tmp[t])
p4 = ax.bar(lim, tmp, width,
bottom=bottom, label='fp', color="red", hatch='.', edgecolor="tomato")
if manualNumberLabels:
ax = putNumber(lim, tmp, bottom, ax, upper, pos = 1.5, color="maroon")
for i, _ in enumerate(bottom):
bottom[i] += tmp[i]
# ax.axhline(0, color='grey', linewidth=0.8)
ax.set_ylabel('# ID cards per category')
ax.set_xlabel('threshold for confidence score $c$')
ax.set_title(legend_name[key])
ax.set_xticks(lim)
lim_str = []
for l in lim:
lim_str.append(str(l))
ax.set_xticklabels(lim_str)
ax.legend()
# Label with label_type 'center' instead of the default 'edge'
for p in [p1, p2, p3, p4]:
if not manualNumberLabels:
ax.bar_label(p, label_type='center')
# ax.bar_label(p2)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0),
ncol=4, fancybox=False, shadow=False)
#plt.legend(loc='center right')
os.chdir(home_path)
plt.savefig("f_tp_{}_{}.eps".format(key, r), format="eps")
plt.savefig("f_tp_{}_{}.jpg".format(key, r), format="jpg")
plt.close()
# ---------------
if False:
plt.title(legend_name[key])
plt.plot(lim, scores_dict[key]["f1"], 'go-', label="F1")
plt.plot(lim, scores_dict[key]["recall"], 'bx:', label="recall")
plt.plot(lim, scores_dict[key]["precision"], 'md--', label="precision")
plt.ylim([0,1])
plt.xlabel("network confidence score")
plt.ylabel("score")
plt.legend()
os.chdir(home_path)
plt.savefig("f_sc_{}_{}.eps".format(key, r), format="eps")
plt.savefig("f_sc_{}_{}.jpg".format(key, r), format="jpg")
plt.close()
# ----------------
if False:
plt.title(legend_name[key])
plt.plot(lim, scores_dict[key]["count"], "ko-")
tmp = [150, 500, 1000, 2000, 5000]
for upper in tmp:
if scores_dict[key]["count"][0] < upper:
break
plt.ylim([0, upper])
plt.xlabel("network confidence score")
plt.ylabel("# examples")
os.chdir(home_path)
plt.savefig("f_conf_{}_{}.eps".format(key, r), format="eps")
plt.savefig("f_conf_{}_{}.jpg".format(key, r), format="jpg")
plt.close()
# ----------------
if False:
plt.title(legend_name[key])
plt.plot(lim, K_alpha(scores_dict[key]["tp"],
scores_dict[key]["tn"],
scores_dict[key]["fp"],
scores_dict[key]["fn"],
1),
'co-', label="$\\alpha=1$")
plt.plot(lim, K_alpha(scores_dict[key]["tp"],
scores_dict[key]["tn"],
scores_dict[key]["fp"],
scores_dict[key]["fn"],
2),
'cx:', label="$\\alpha=2$")
plt.plot(lim, K_alpha(scores_dict[key]["tp"],
scores_dict[key]["tn"],
scores_dict[key]["fp"],
scores_dict[key]["fn"],
10),
'cd--', label="$\\alpha=10$")
plt.plot(lim, K_alpha(scores_dict[key]["tp"],
scores_dict[key]["tn"],
scores_dict[key]["fp"],
scores_dict[key]["fn"],
100),
'c*', label = "$\\alpha=100$")
plt.ylim([0, 1])
plt.xlabel("network confidence score")
plt.ylabel("$W_{\\alpha}$")
plt.legend()
os.chdir(home_path)
plt.savefig("f_k_{}_{}.eps".format(key, r), format="eps")
plt.savefig("f_k_{}_{}.jpg".format(key, r), format="jpg")
plt.close()
# ---------------------------------------------
plt.title("summary for all keys")
for key in keys:
try:
c = legend_color[key]
n = legend_name[key]
m = legend_marker[key]
except:
c = "yellow"
n = "unk"
m = "x"
plt.plot(lim, scores_dict[key]["f1"], label=n, color=c, marker=m)
plt.xlabel("threshold for confidence score $c$")
plt.ylabel("f1 score")
plt.ylim([-0.05,1.4])
# plt.legend()
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0),
ncol=3, fancybox=False, shadow=False)
os.chdir(home_path)
plt.savefig("f_f1_{}.eps".format(r), format="eps")
plt.savefig("f_f1_{}.jpg".format(r), format="jpg")
plt.close()
# ---------------------------------------------
for alpha in [1, 2, 10, 100]:
plt.title("summary for all keys")
for key in keys:
try:
c = legend_color[key]
n = legend_name[key]
m = legend_marker[key]
except:
c = "yellow"
n = "unk"
m = "x"
plt.plot(lim, K_alpha(scores_dict[key]["tp"],
scores_dict[key]["tn"],
scores_dict[key]["fp"],
scores_dict[key]["fn"],
alpha), label=n, color=c, marker=m, linestyle="--")
plt.xlabel("threshold for confidence score $c$")
plt.ylabel("$W_{\\alpha}$ score ($\\alpha=$" + str(alpha) + ")")
plt.ylim([-0.05, 1.4])
# plt.legend()
plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0),
ncol=3, fancybox=False, shadow=False)
os.chdir(home_path)
plt.savefig("f_k_{}_{}.eps".format(alpha, r), format="eps")
plt.savefig("f_k_{}_{}.jpg".format(alpha, r), format="jpg")
plt.close()
with open("all_scores" + ".json", "w") as f:
json.dump(all_scores, f)
# dont to it the last time (saves time, and there is no use to train once more)
if r < RETRAIN:
onlySave = False
else:
onlySave = True
testset += 1
model, dataset, NAME_r, CHECKPOINT = retrain(model,
dataset,
NAME_r,
CHECKPOINT,
EPOCHS,
end=end,
onlySave=onlySave,
console=console,
nb_used=nb_used,
dir2=dir2,
testset=testset,
dataset_bert=dataset_bert)
return all_scores
if False:
print("{} Test on part CH dataset (test) {}".format("="*100, "="*100))
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores)
if False:
print("{} Test on new CH mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="CH", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="CH",
testset=1)
if False:
print("{} Test on new CH1 mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="CH1", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="CH1",
testset=1)
if False:
print("{} Test on new CH10 mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="CH10", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="CH10",
testset=1)
if False:
print("{} Test on new CH50 mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="CH50", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="CH50",
testset=1)
if False:
print("{} Test on new FI mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="FI", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="FI",
testset=1)
if False:
print("{} Test on new FI mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="DE", end="", testset=1)
dataset2.load_data(dir="DE", end="", testset=1)
all_scores = generate(combo_model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="DE",
testset=1, dataset_bert=dataset2)
if True:
print("{} Test on new DE mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="FI", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="FI",
testset=1, dataset_bert=dataset)
if False:
print("{} Test on new DEL label mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="DEL", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="DEL",
testset=1)
if False:
print("{} Test on new DEN number mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="DEN", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="DEN",
testset=1)
if False:
print("{} Test on new DED dcoulbe mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="DED", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="DED",
testset=1)
if False:
print("{} Test on new FR mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="CH50", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="CH50",
testset=1)
if False:
print("{} Test on new IT mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="IT", end="", testset=1)
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="IT",
testset=1)
if False:
print("{} Test on FR mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="fr_mixed_full", end="_fr")
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="fr")
if False:
print("{} Test on FI mixed dataset {}".format("="*100, "="*100))
dataset.load_data(dir="fi_mixed_full", end="_fi")
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="fi")
if False:
print("{} Test on CH dataset {}".format("="*100, "="*100))
dataset.load_data(dir="ch_full")
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores)
if False:
print("{} Test on FR dataset {}".format("="*100, "="*100))
dataset.load_data(dir="fr_full_surname", end="_fr")
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="fr")
if False:
print("{} Test on FI dataset {}".format("="*100, "="*100))
dataset.load_data(dir="fi_full_surname", end="_fi")
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="fi")
if False:
print("{} Test on FI dataset with Finnish names {}".format("="*100, "="*100))
dataset.load_data(dir="fi_full_surname_fi_names", end="_fi")
all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="fi2")
print("{}\nSummary\n{}".format("="*100, "="*100))
for item in all_scores:
print(item)

Event Timeline