main.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Sat, Apr 27, 18:10

main.py
View Options

	# score per question category

	# entropy per question (if it's always the same, don't accept it? Prolem with nationality i.e.)

	import os

	import load_files as lf
	import model as mo

	import json
	import copy
	import pickle

	import matplotlib.pyplot as plt
	plt.rcParams['pdf.fonttype'] = 42
	plt.rcParams['ps.fonttype'] = 42

	MULTIMODEL = False

	# choose the model type
	MODEL = "gpt" # gpt, bert
	MODEL2 = "bert" # gpt, bert
	MODEL3 = "bert" # gpt, bert
	# train the model from scratch or from a checkpoint, not supported anymore
	SCRATCH = False # True, False
	# choose the checkpoint
	CHECKPOINT = "gpt2" # xlm-roberta-base, gpt2, None, roberta-base, "gpt2-medium", "gpt2-large", "gpt2-xl"
	CHECKPOINT2 = "roberta-base"
	CHECKPOINT3 = "xlm-roberta-base"

	# train the model in this run
	TRAIN = False # True, False
	# number of training epochs
	EPOCHS = 1
	# new model name
	NAME = "xlm_e1_CHpart" # "gpt_e1_CHpart" # "xlm_e1_CHpart" # "roberta_e1_CHpart" # xmlr_e_10_test, gpt_e_1_test
	NAME2 = "roberta_e1_CHpart"
	NAME3 = "xlm_e1_CHpart"

	# probability Modes to be tested
	PROBA = ["forceNon0"]#, "longOk", ["mult", "forceNon0", "maxNon0"], ["mult", "longOk"]
	PROBA2 = ["forceNon0"]
	PROBA3 = ["forceNon0"]
	# number of times the model should be retrained (0 is never)
	RETRAIN = 3

	home_path = os.getcwd()

	# load a dataset
	if MODEL == "gpt":
	print("Start instances for a gpt2 model")
	dataset = lf.DatasetGPT(home_path)
	model = mo.GPTModel(home_path, printStep=-1)
	elif MODEL == "bert":
	print("Start instances for a bert model")
	dataset = lf.DatasetBert(home_path)
	model = mo.BertModel(home_path, printStep=-1)
	else:
	NotImplementedError("Model type not defined")

	# load additional models, if ensemble learning is enabled
	if MULTIMODEL:
	if MODEL2 == "bert":
	print("Start instances for a bert model")
	dataset2 = lf.DatasetBert(home_path)
	model2 = mo.BertModel(home_path, printStep=-1)

	if MODEL3 == "bert":
	print("Start instances for a bert model")
	dataset3 = lf.DatasetBert(home_path)
	model3 = mo.BertModel(home_path, printStep=-1)

	combo_model = mo.ComboModel(home_path, printStep=-1,
	models = [model, model2, model3],
	ModelNames = [NAME, NAME2, NAME3],
	probaModes = [PROBA[0], PROBA2[0], PROBA3[0]]
	)

	# load a dataset
	# dataset.load_data()
	dataset.load_data(dir="CH_part", end="", testset=1)

	# train a tokenizer from scratch, not supported anymore
	if SCRATCH:
	NotImplementedError("Train a tokenizer")
	SCRATCH = "loc of tok"
	else:
	SCRATCH = None

	# train the model
	if TRAIN:
	model.train(nbEpochs=EPOCHS,
	outModelName=NAME,
	startCheckpoint=CHECKPOINT,
	dataEnd="",
	tokenizerLocaction=SCRATCH)

	# load the model
	model.load_model(NAME)
	if MULTIMODEL:
	# load all the models needed for ensemble learning
	combo_model.load_model("***")

	all_scores = []

	def retrain(model, dataset, NAME, CHECKPOINT, EPOCHS, nb_used=100, end="", onlySave=True, console="",
	dir2=None, testset=None, dataset_bert=None):
	"""
	Finetune a given model

	:param model: Model, from the parent class Model
	:param dataset: Dataset, from the parent class Dataset
	:param NAME: The filename of the saved dataset containing sure guesses and unused samples
	:param CHECKPOINT: folder from where to load the model/dataset
	:param EPOCHS: number of training epochs
	:param nb_used: number of used samples for training/evaluation
	:param end: file name extension
	:param onlySave: only save the results, but don't retrain the model
	:param console: string where the console output is saved
	:param dir2: directory containing a dataset from bert
	:param testset: integer between 1 and 4, specifing which one of the test dataset is used
	:param dataset_bert: dataset used for the bert models
	:return:
	model: finetuned model,
	dataset: the new used testset,
	NAME: new name of the model
	CHECKPOINT: new directory for the saved files
	"""
	xsure, ysure = model.getSureGuesses()
	xtest, ytest = dataset.get_test()

	# replace everything with the wrong answer, to check if finetuning does it still improve performance
	if False:
	for i, _ in enumerate(ysure):
	ysure[i] = "<unk>"

	# check if any examples have been found above the limit confidence score
	# otherwise finetuning can not be performed
	if len(xsure) == 0:
	print("{}\n{}\n{}\n{}\n{}".format("="50, "="50, "there are no sure guesses...", "="50, "="50))
	return model, dataset, NAME, CHECKPOINT

	# increase lenght of the test set, by dublicating it
	# if the finetuning dataset is too small, the network is unable to finetune and will produce and error message
	while len(xsure) < 100:
	xsure = xsure + xsure
	ysure = ysure + ysure

	# save the remaining part of the dataset that is untouched and the sure guesses
	dataset.save_data(xsure,
	ysure,
	NAME,
	x_test=xtest[nb_used:],
	y_test=ytest[nb_used:],
	console=console,

	# finetune the model )
	if not onlySave:
	CHECKPOINT = NAME
	# change the name of the new model
	NAME += "_adapt_" + end

	# load the dataset
	dataset.load_data(dir=CHECKPOINT, end="", dir2=dir2, testset=testset)
	# load the dataset for the bert model (only used in ensemble learning)
	dataset_bert.load_data(dir=CHECKPOINT, end="", dir2=dir2, testset=testset)

	# attach the file ending if there is any
	if end != "":
	end = "_" + end

	# finetune the model
	model.train(nbEpochs=EPOCHS,
	outModelName=NAME,
	startCheckpoint=CHECKPOINT,
	tokenizerLocaction=CHECKPOINT,
	dataEnd="",
	)

	# load the new model
	model.load_model(NAME)

	return model, dataset, NAME, CHECKPOINT

	def K_alpha(tp, tn, fp, fn, alpha):
	"""
	Calculate the W_alpha score (is 1 iff all examples are true positive

	:param tp: list, amount of true positive examples for each threshold limit
	:param tn: list, amount of true negative examples for each threshold limit
	:param fp: list, amount of false positive examples for each threshold limit
	:param fn: list, amount of false negative examples for each threshold limit
	:param alpha: punishment factor for false positive examples
	:return:
	K: list of W_alpha scores (one for each threshold limit)
	"""
	K = []
	for i, _ in enumerate(tp):
	K.append(tp[i]/(tp[i]+fp[i]*alpha+tn[i]+fn[i]))
	return K

	def generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end = "", nb_used=1000,
	dir2=None, testset=None, dataset_bert = None):
	"""
	Extract answers from the test set and summarize the results in diagrams

	:param model: the used model
	:param dataset: the used dataset
	:param NAME: name of the model
	:param RETRAIN: number finetuining trainings
	:param CHECKPOINT: directory from where to load the model/dataset
	:param EPOCHS: number of epochs used during finetuning
	:param all_scores: list containing a summary of results form previous generations
	:param end: file extension
	:param nb_used: number of samples used from the test set
	(usually not choosen above 1000, otherwise the execution takes a lot of time)
	:param dir2: directory for the bert model, only used in ensemble learning
	:param testset: integer usually between 1 and 4, specifing which test set is used (there are multible)
	:param dataset_bert: dataset of bert, only used in ensemble learning
	:return:
	all_scores: list containing a summary of results, updated with current results
	"""

	# color used for different label categories in the plots
	legend_color = {
	"all": "black",
	"sex": "goldenrod",
	"height": "red",
	"dateOfBirth": "plum",
	"dateOfExpiry": "fuchsia",
	"dateOfIssue": "deeppink",
	"placeOfBirth": "darkgreen",
	"surname": "lawngreen",
	"givenName": "mediumseagreen",
	"placeOfOrigin": "darkslategrey",
	"identityCard": "darkred",
	"nationality": "orange",
	"eyeColor": "blue",
	"fake_height": "black",
	}

	# markers used for different label categories in the plots
	legend_marker = {
	"all": "o",
	"sex": "v",
	"height": "^",
	"dateOfBirth": "<",
	"dateOfExpiry": ">",
	"dateOfIssue": "*",
	"placeOfBirth": "x",
	"surname": "D",
	"givenName": "o",
	"placeOfOrigin": "v",
	"identityCard": "^",
	"nationality": "<",
	"eyeColor": ">",
	"fake_height": "*",
	}

	# legend name used for different label categories in the plots
	legend_name = {
	"all": "all",
	"sex": "sex",
	"height": "height",
	"dateOfBirth": "date of birth",
	"dateOfExpiry": "date of expiry",
	"dateOfIssue": "date of issue",
	"placeOfBirth": "place of birth",
	"surname": "last name",
	"givenName": "first name",
	"placeOfOrigin": "place of origin",
	"identityCard": "card number",
	"nationality": "nationality",
	"eyeColor": "eye color",
	"fake_height": "fake_height",
	}

	# extract the information using the different selected token selection mechanisms (probability modes)
	for m in PROBA:
	# load the correct model
	model.load_model(NAME)
	# update the probability mode
	model.set_proba_mode(m)
	# save the name of the used model
	NAME_r = NAME
	# to the amount of specified fine-tune steps
	for r in range(RETRAIN + 1):
	# extract the informations from the specified test set
	# set to False, if just the plots should be generated from previous saved extractions
	if True:
	# extract the answers
	scores_dict, scores, console = model.generate(dataset.get_test(), 0, nb_used, data_bert = dataset_bert.get_test())
	# update list of summarized results
	all_scores.append(scores)
	# save detailed results, such that figures can be created/modified at a later point
	os.chdir(home_path)
	with open("scores_dict_{}_{}_{}".format(r, dir2, MODEL) + ".p", "wb") as f:
	pickle.dump([scores_dict, console], f)

	# just generate figures
	else:
	# load results from previous extractions
	os.chdir(home_path)
	with open("scores_dict_{}_{}_{}".format(r, dir2, MODEL) + ".p", 'rb') as fp:
	preds = pickle.load(fp)
	scores_dict = preds[0]
	console = preds[1]

	# print all scores

	# show amount of true positive, false positive, true negative and false negative
	keys = scores_dict.keys()
	buckets = len(scores_dict["all"]["count"])
	lim = [x/buckets for x in range(buckets)]
	for key in keys:
	# old plots, have results presented as a line plot
	if False:
	plt.title(legend_name[key])
	plt.plot(lim, scores_dict[key]["tp"], 'g+-', label="tp")
	plt.plot(lim, scores_dict[key]["tn"], 'go--', label="tn")
	plt.plot(lim, scores_dict[key]["fp"], 'r+-', label="fp")
	plt.plot(lim, scores_dict[key]["fn"], 'ro--', label="fn")

	tmp = [120, 500, 1200, 2000, 5000]
	for upper in tmp:
	if scores_dict[key]["tp"][0]+scores_dict[key]["fp"][0] < upper:
	break
	plt.ylim([0, upper])

	plt.xlabel("network confidence score")
	plt.ylabel("# examples")
	plt.legend()
	os.chdir(home_path)
	plt.savefig("f_tp_{}_{}.eps".format(key, r), format="eps")
	plt.savefig("f_tp_{}_{}.jpg".format(key, r), format="jpg")
	plt.close()
	# present results as a barplot
	else:
	# set the numbers in the bars manually as text label
	# (allows to ignore 0 values, and have small font for small values)
	manualNumberLabels = True

	def putNumber(X, Y, bottom, ax, upper, pos=0, color="black"):
	"""
	Put a labeled field in the plot with the numeric value of the ploted bar
	:param X: list of the x values
	:param Y: list of the y values
	:param bottom: lower end of the plotted bar (list)
	:param ax: the figure that should be edited
	:param upper: upper limit of the y axis
	:param pos: how much the number should be shifted to the left/right
	:param color: color of the font
	:return:
	ax: return the updated figure
	"""
	# get spacing of the bars on the x-axis
	b = X[1]-X[0]
	# set offset to center based on the y scale
	if upper > 1000:
	off = [15, 10]
	else:
	off = [2, 1]
	# set a numeric label for all bars
	for i, x in enumerate(X):
	# if the value is bigger then 3% of the maximum representable value, use standart format
	# (assuming the bar is big enought to contain the entire label)
	if Y[i] > 0.03*upper:
	ax.text(x-b/4, Y[i]/2+bottom[i]-off[0], str(int(Y[i])), color="black", fontsize=10, bbox={'edgecolor': "white", 'facecolor': 'white', 'alpha': 0, 'pad': 2})
	# if the value is bigger then 0, but the bar is small, use smaller font and
	# allow small horizontal deplacments to avoid overlap
	elif Y[i] > 0:
	ax.text(x-b/4+b/4*pos, Y[i] / 2 + bottom[i]-off[1], str(int(Y[i])), color="black", fontsize=6,
	bbox={'edgecolor': "white", 'facecolor': 'white', 'alpha': 0, 'pad': 2})
	return ax

	width = 0.08
	fig, ax = plt.subplots()
	#115, 150
	tmp = [150, 500, 1150, 2000, 5000]
	for upper in tmp:
	if scores_dict[key]["tp"][0] + scores_dict[key]["fp"][0] < upper:
	break
	plt.ylim([0, upper])

	tmp = copy.deepcopy(scores_dict[key]["tp"])
	for t, _ in enumerate(tmp):
	tmp[t] = int(tmp[t])
	p1 = ax.bar(lim, tmp , width, label='tp', color="lime", hatch='/', edgecolor="limegreen")
	if manualNumberLabels:
	ax = putNumber(lim, tmp, [0 for _ in lim], ax, upper, pos = 1.5, color="darkslategray")
	bottom = copy.deepcopy(tmp)

	tmp = copy.deepcopy(scores_dict[key]["tn"])
	for t, _ in enumerate(tmp):
	tmp[t] = int(tmp[t])
	p2 = ax.bar(lim, tmp, width,
	bottom=bottom, label='tn', color="orange", hatch='-', edgecolor="darkorange")
	if manualNumberLabels:
	ax = putNumber(lim, tmp, bottom, ax, upper, pos = -0.25, color="saddlebrown")
	for i, _ in enumerate(bottom):
	bottom[i] += tmp[i]

	tmp = copy.deepcopy(scores_dict[key]["fn"])
	for t, _ in enumerate(tmp):
	tmp[t] = int(tmp[t])
	p3 = ax.bar(lim, tmp, width,
	bottom=bottom, label='fn', color="darkgreen", hatch='x', edgecolor="green")
	if manualNumberLabels:
	ax = putNumber(lim, tmp, bottom, ax, upper, pos=0.625, color="darkolivegreen")
	for i, _ in enumerate(bottom):
	bottom[i] += tmp[i]

	tmp = copy.deepcopy(scores_dict[key]["fp"])
	for t, _ in enumerate(tmp):
	tmp[t] = int(tmp[t])
	p4 = ax.bar(lim, tmp, width,
	bottom=bottom, label='fp', color="red", hatch='.', edgecolor="tomato")
	if manualNumberLabels:
	ax = putNumber(lim, tmp, bottom, ax, upper, pos = 1.5, color="maroon")
	for i, _ in enumerate(bottom):
	bottom[i] += tmp[i]

	# ax.axhline(0, color='grey', linewidth=0.8)
	ax.set_ylabel('# ID cards per category')
	ax.set_xlabel('threshold for confidence score $c$')
	ax.set_title(legend_name[key])
	ax.set_xticks(lim)
	lim_str = []
	for l in lim:
	lim_str.append(str(l))
	ax.set_xticklabels(lim_str)
	ax.legend()

	# Label with label_type 'center' instead of the default 'edge'
	for p in [p1, p2, p3, p4]:
	if not manualNumberLabels:
	ax.bar_label(p, label_type='center')
	# ax.bar_label(p2)

	ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0),
	ncol=4, fancybox=False, shadow=False)

	#plt.legend(loc='center right')
	os.chdir(home_path)
	plt.savefig("f_tp_{}_{}.eps".format(key, r), format="eps")
	plt.savefig("f_tp_{}_{}.jpg".format(key, r), format="jpg")
	plt.close()

	# ---------------
	if False:
	plt.title(legend_name[key])
	plt.plot(lim, scores_dict[key]["f1"], 'go-', label="F1")
	plt.plot(lim, scores_dict[key]["recall"], 'bx:', label="recall")
	plt.plot(lim, scores_dict[key]["precision"], 'md--', label="precision")
	plt.ylim([0,1])

	plt.xlabel("network confidence score")
	plt.ylabel("score")
	plt.legend()
	os.chdir(home_path)
	plt.savefig("f_sc_{}_{}.eps".format(key, r), format="eps")
	plt.savefig("f_sc_{}_{}.jpg".format(key, r), format="jpg")
	plt.close()

	# ----------------
	if False:
	plt.title(legend_name[key])
	plt.plot(lim, scores_dict[key]["count"], "ko-")

	tmp = [150, 500, 1000, 2000, 5000]
	for upper in tmp:
	if scores_dict[key]["count"][0] < upper:
	break
	plt.ylim([0, upper])

	plt.xlabel("network confidence score")
	plt.ylabel("# examples")
	os.chdir(home_path)
	plt.savefig("f_conf_{}_{}.eps".format(key, r), format="eps")
	plt.savefig("f_conf_{}_{}.jpg".format(key, r), format="jpg")
	plt.close()

	# ----------------
	if False:
	plt.title(legend_name[key])
	plt.plot(lim, K_alpha(scores_dict[key]["tp"],
	scores_dict[key]["tn"],
	scores_dict[key]["fp"],
	scores_dict[key]["fn"],
	1),
	'co-', label="$\\alpha=1$")
	plt.plot(lim, K_alpha(scores_dict[key]["tp"],
	scores_dict[key]["tn"],
	scores_dict[key]["fp"],
	scores_dict[key]["fn"],
	2),
	'cx:', label="$\\alpha=2$")
	plt.plot(lim, K_alpha(scores_dict[key]["tp"],
	scores_dict[key]["tn"],
	scores_dict[key]["fp"],
	scores_dict[key]["fn"],
	10),
	'cd--', label="$\\alpha=10$")
	plt.plot(lim, K_alpha(scores_dict[key]["tp"],
	scores_dict[key]["tn"],
	scores_dict[key]["fp"],
	scores_dict[key]["fn"],
	100),
	'c*', label = "$\\alpha=100$")
	plt.ylim([0, 1])

	plt.xlabel("network confidence score")
	plt.ylabel("$W_{\\alpha}$")
	plt.legend()
	os.chdir(home_path)
	plt.savefig("f_k_{}_{}.eps".format(key, r), format="eps")
	plt.savefig("f_k_{}_{}.jpg".format(key, r), format="jpg")
	plt.close()

	# ---------------------------------------------

	plt.title("summary for all keys")
	for key in keys:
	try:
	c = legend_color[key]
	n = legend_name[key]
	m = legend_marker[key]
	except:
	c = "yellow"
	n = "unk"
	m = "x"
	plt.plot(lim, scores_dict[key]["f1"], label=n, color=c, marker=m)

	plt.xlabel("threshold for confidence score $c$")
	plt.ylabel("f1 score")
	plt.ylim([-0.05,1.4])
	# plt.legend()
	plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0),
	ncol=3, fancybox=False, shadow=False)

	os.chdir(home_path)
	plt.savefig("f_f1_{}.eps".format(r), format="eps")
	plt.savefig("f_f1_{}.jpg".format(r), format="jpg")
	plt.close()

	# ---------------------------------------------

	for alpha in [1, 2, 10, 100]:
	plt.title("summary for all keys")
	for key in keys:
	try:
	c = legend_color[key]
	n = legend_name[key]
	m = legend_marker[key]
	except:
	c = "yellow"
	n = "unk"
	m = "x"
	plt.plot(lim, K_alpha(scores_dict[key]["tp"],
	scores_dict[key]["tn"],
	scores_dict[key]["fp"],
	scores_dict[key]["fn"],
	alpha), label=n, color=c, marker=m, linestyle="--")

	plt.xlabel("threshold for confidence score $c$")
	plt.ylabel("$W_{\\alpha}$ score ($\\alpha=$" + str(alpha) + ")")
	plt.ylim([-0.05, 1.4])
	# plt.legend()
	plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.0),
	ncol=3, fancybox=False, shadow=False)

	os.chdir(home_path)
	plt.savefig("f_k_{}_{}.eps".format(alpha, r), format="eps")
	plt.savefig("f_k_{}_{}.jpg".format(alpha, r), format="jpg")
	plt.close()

	with open("all_scores" + ".json", "w") as f:
	json.dump(all_scores, f)

	# dont to it the last time (saves time, and there is no use to train once more)
	if r < RETRAIN:
	onlySave = False
	else:
	onlySave = True

	testset += 1
	model, dataset, NAME_r, CHECKPOINT = retrain(model,
	dataset,
	NAME_r,
	CHECKPOINT,
	EPOCHS,
	end=end,
	onlySave=onlySave,
	console=console,
	nb_used=nb_used,
	dir2=dir2,
	testset=testset,
	dataset_bert=dataset_bert)

	return all_scores

	if False:
	print("{} Test on part CH dataset (test) {}".format("="100, "="100))

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores)

	if False:
	print("{} Test on new CH mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="CH", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="CH",
	testset=1)

	if False:
	print("{} Test on new CH1 mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="CH1", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="CH1",
	testset=1)
	if False:
	print("{} Test on new CH10 mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="CH10", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="CH10",
	testset=1)
	if False:
	print("{} Test on new CH50 mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="CH50", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="CH50",
	testset=1)

	if False:
	print("{} Test on new FI mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="FI", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="FI",
	testset=1)

	if False:
	print("{} Test on new FI mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="DE", end="", testset=1)
	dataset2.load_data(dir="DE", end="", testset=1)

	all_scores = generate(combo_model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="DE",
	testset=1, dataset_bert=dataset2)

	if True:
	print("{} Test on new DE mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="FI", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="FI",
	testset=1, dataset_bert=dataset)

	if False:
	print("{} Test on new DEL label mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="DEL", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="DEL",
	testset=1)

	if False:
	print("{} Test on new DEN number mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="DEN", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="DEN",
	testset=1)

	if False:
	print("{} Test on new DED dcoulbe mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="DED", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="DED",
	testset=1)

	if False:
	print("{} Test on new FR mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="CH50", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="CH50",
	testset=1)

	if False:
	print("{} Test on new IT mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="IT", end="", testset=1)

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="", dir2="IT",
	testset=1)

	if False:
	print("{} Test on FR mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="fr_mixed_full", end="_fr")

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="fr")

	if False:
	print("{} Test on FI mixed dataset {}".format("="100, "="100))
	dataset.load_data(dir="fi_mixed_full", end="_fi")

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="fi")

	if False:
	print("{} Test on CH dataset {}".format("="100, "="100))
	dataset.load_data(dir="ch_full")

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores)

	if False:
	print("{} Test on FR dataset {}".format("="100, "="100))
	dataset.load_data(dir="fr_full_surname", end="_fr")

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="fr")

	if False:
	print("{} Test on FI dataset {}".format("="100, "="100))
	dataset.load_data(dir="fi_full_surname", end="_fi")

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="fi")

	if False:
	print("{} Test on FI dataset with Finnish names {}".format("="100, "="100))
	dataset.load_data(dir="fi_full_surname_fi_names", end="_fi")

	all_scores = generate(model, dataset, NAME, RETRAIN, CHECKPOINT, EPOCHS, all_scores, end="fi2")

	print("{}\nSummary\n{}".format("="100, "="100))
	for item in all_scores:
	print(item)

main.pyNo OneTemporaryActions

File Metadata

main.pyView Options

Event Timeline

main.py
No OneTemporary
Actions

main.py
View Options