Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F87039349
main.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Oct 10, 04:31
Size
30 KB
Mime Type
text/x-python
Expires
Sat, Oct 12, 04:31 (1 d, 22 h)
Engine
blob
Format
Raw Data
Handle
21526687
Attached To
R11484 ADDI
main.py
View Options
# score per question category
# entropy per question (if it's always the same, don't accept it? Prolem with nationality i.e.)
import
os
import
load_files
as
lf
import
model
as
mo
import
json
import
copy
import
pickle
import
matplotlib.pyplot
as
plt
plt
.
rcParams
[
'pdf.fonttype'
]
=
42
plt
.
rcParams
[
'ps.fonttype'
]
=
42
MULTIMODEL
=
False
# choose the model type
MODEL
=
"gpt"
# gpt, bert
MODEL2
=
"bert"
# gpt, bert
MODEL3
=
"bert"
# gpt, bert
# train the model from scratch or from a checkpoint, not supported anymore
SCRATCH
=
False
# True, False
# choose the checkpoint
CHECKPOINT
=
"gpt2"
# xlm-roberta-base, gpt2, None, roberta-base, "gpt2-medium", "gpt2-large", "gpt2-xl"
CHECKPOINT2
=
"roberta-base"
CHECKPOINT3
=
"xlm-roberta-base"
# train the model in this run
TRAIN
=
False
# True, False
# number of training epochs
EPOCHS
=
1
# new model name
NAME
=
"xlm_e1_CHpart"
# "gpt_e1_CHpart" # "xlm_e1_CHpart" # "roberta_e1_CHpart" # xmlr_e_10_test, gpt_e_1_test
NAME2
=
"roberta_e1_CHpart"
NAME3
=
"xlm_e1_CHpart"
# probability Modes to be tested
PROBA
=
[
"forceNon0"
]
#, "longOk", ["mult", "forceNon0", "maxNon0"], ["mult", "longOk"]
PROBA2
=
[
"forceNon0"
]
PROBA3
=
[
"forceNon0"
]
# number of times the model should be retrained (0 is never)
RETRAIN
=
3
home_path
=
os
.
getcwd
()
# load a dataset
if
MODEL
==
"gpt"
:
print
(
"Start instances for a gpt2 model"
)
dataset
=
lf
.
DatasetGPT
(
home_path
)
model
=
mo
.
GPTModel
(
home_path
,
printStep
=-
1
)
elif
MODEL
==
"bert"
:
print
(
"Start instances for a bert model"
)
dataset
=
lf
.
DatasetBert
(
home_path
)
model
=
mo
.
BertModel
(
home_path
,
printStep
=-
1
)
else
:
NotImplementedError
(
"Model type not defined"
)
# load additional models, if ensemble learning is enabled
if
MULTIMODEL
:
if
MODEL2
==
"bert"
:
print
(
"Start instances for a bert model"
)
dataset2
=
lf
.
DatasetBert
(
home_path
)
model2
=
mo
.
BertModel
(
home_path
,
printStep
=-
1
)
if
MODEL3
==
"bert"
:
print
(
"Start instances for a bert model"
)
dataset3
=
lf
.
DatasetBert
(
home_path
)
model3
=
mo
.
BertModel
(
home_path
,
printStep
=-
1
)
combo_model
=
mo
.
ComboModel
(
home_path
,
printStep
=-
1
,
models
=
[
model
,
model2
,
model3
],
ModelNames
=
[
NAME
,
NAME2
,
NAME3
],
probaModes
=
[
PROBA
[
0
],
PROBA2
[
0
],
PROBA3
[
0
]]
)
# load a dataset
# dataset.load_data()
dataset
.
load_data
(
dir
=
"CH_part"
,
end
=
""
,
testset
=
1
)
# train a tokenizer from scratch, not supported anymore
if
SCRATCH
:
NotImplementedError
(
"Train a tokenizer"
)
SCRATCH
=
"loc of tok"
else
:
SCRATCH
=
None
# train the model
if
TRAIN
:
model
.
train
(
nbEpochs
=
EPOCHS
,
outModelName
=
NAME
,
startCheckpoint
=
CHECKPOINT
,
dataEnd
=
""
,
tokenizerLocaction
=
SCRATCH
)
# load the model
model
.
load_model
(
NAME
)
if
MULTIMODEL
:
# load all the models needed for ensemble learning
combo_model
.
load_model
(
"***"
)
all_scores
=
[]
def
retrain
(
model
,
dataset
,
NAME
,
CHECKPOINT
,
EPOCHS
,
nb_used
=
100
,
end
=
""
,
onlySave
=
True
,
console
=
""
,
dir2
=
None
,
testset
=
None
,
dataset_bert
=
None
):
"""
Finetune a given model
:param model: Model, from the parent class Model
:param dataset: Dataset, from the parent class Dataset
:param NAME: The filename of the saved dataset containing sure guesses and unused samples
:param CHECKPOINT: folder from where to load the model/dataset
:param EPOCHS: number of training epochs
:param nb_used: number of used samples for training/evaluation
:param end: file name extension
:param onlySave: only save the results, but don't retrain the model
:param console: string where the console output is saved
:param dir2: directory containing a dataset from bert
:param testset: integer between 1 and 4, specifing which one of the test dataset is used
:param dataset_bert: dataset used for the bert models
:return:
model: finetuned model,
dataset: the new used testset,
NAME: new name of the model
CHECKPOINT: new directory for the saved files
"""
xsure
,
ysure
=
model
.
getSureGuesses
()
xtest
,
ytest
=
dataset
.
get_test
()
# replace everything with the wrong answer, to check if finetuning does it still improve performance
if
False
:
for
i
,
_
in
enumerate
(
ysure
):
ysure
[
i
]
=
"<unk>"
# check if any examples have been found above the limit confidence score
# otherwise finetuning can not be performed
if
len
(
xsure
)
==
0
:
print
(
"{}
\n
{}
\n
{}
\n
{}
\n
{}"
.
format
(
"="
*
50
,
"="
*
50
,
"there are no sure guesses..."
,
"="
*
50
,
"="
*
50
))
return
model
,
dataset
,
NAME
,
CHECKPOINT
# increase lenght of the test set, by dublicating it
# if the finetuning dataset is too small, the network is unable to finetune and will produce and error message
while
len
(
xsure
)
<
100
:
xsure
=
xsure
+
xsure
ysure
=
ysure
+
ysure
# save the remaining part of the dataset that is untouched and the sure guesses
dataset
.
save_data
(
xsure
,
ysure
,
NAME
,
x_test
=
xtest
[
nb_used
:],
y_test
=
ytest
[
nb_used
:],
console
=
console
,
# finetune the model )
if
not
onlySave
:
CHECKPOINT
=
NAME
# change the name of the new model
NAME
+=
"_adapt_"
+
end
# load the dataset
dataset
.
load_data
(
dir
=
CHECKPOINT
,
end
=
""
,
dir2
=
dir2
,
testset
=
testset
)
# load the dataset for the bert model (only used in ensemble learning)
dataset_bert
.
load_data
(
dir
=
CHECKPOINT
,
end
=
""
,
dir2
=
dir2
,
testset
=
testset
)
# attach the file ending if there is any
if
end
!=
""
:
end
=
"_"
+
end
# finetune the model
model
.
train
(
nbEpochs
=
EPOCHS
,
outModelName
=
NAME
,
startCheckpoint
=
CHECKPOINT
,
tokenizerLocaction
=
CHECKPOINT
,
dataEnd
=
""
,
)
# load the new model
model
.
load_model
(
NAME
)
return
model
,
dataset
,
NAME
,
CHECKPOINT
def
K_alpha
(
tp
,
tn
,
fp
,
fn
,
alpha
):
"""
Calculate the W_alpha score (is 1 iff all examples are true positive
:param tp: list, amount of true positive examples for each threshold limit
:param tn: list, amount of true negative examples for each threshold limit
:param fp: list, amount of false positive examples for each threshold limit
:param fn: list, amount of false negative examples for each threshold limit
:param alpha: punishment factor for false positive examples
:return:
K: list of W_alpha scores (one for each threshold limit)
"""
K
=
[]
for
i
,
_
in
enumerate
(
tp
):
K
.
append
(
tp
[
i
]
/
(
tp
[
i
]
+
fp
[
i
]
*
alpha
+
tn
[
i
]
+
fn
[
i
]))
return
K
def
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
nb_used
=
1000
,
dir2
=
None
,
testset
=
None
,
dataset_bert
=
None
):
"""
Extract answers from the test set and summarize the results in diagrams
:param model: the used model
:param dataset: the used dataset
:param NAME: name of the model
:param RETRAIN: number finetuining trainings
:param CHECKPOINT: directory from where to load the model/dataset
:param EPOCHS: number of epochs used during finetuning
:param all_scores: list containing a summary of results form previous generations
:param end: file extension
:param nb_used: number of samples used from the test set
(usually not choosen above 1000, otherwise the execution takes a lot of time)
:param dir2: directory for the bert model, only used in ensemble learning
:param testset: integer usually between 1 and 4, specifing which test set is used (there are multible)
:param dataset_bert: dataset of bert, only used in ensemble learning
:return:
all_scores: list containing a summary of results, updated with current results
"""
# color used for different label categories in the plots
legend_color
=
{
"all"
:
"black"
,
"sex"
:
"goldenrod"
,
"height"
:
"red"
,
"dateOfBirth"
:
"plum"
,
"dateOfExpiry"
:
"fuchsia"
,
"dateOfIssue"
:
"deeppink"
,
"placeOfBirth"
:
"darkgreen"
,
"surname"
:
"lawngreen"
,
"givenName"
:
"mediumseagreen"
,
"placeOfOrigin"
:
"darkslategrey"
,
"identityCard"
:
"darkred"
,
"nationality"
:
"orange"
,
"eyeColor"
:
"blue"
,
"fake_height"
:
"black"
,
}
# markers used for different label categories in the plots
legend_marker
=
{
"all"
:
"o"
,
"sex"
:
"v"
,
"height"
:
"^"
,
"dateOfBirth"
:
"<"
,
"dateOfExpiry"
:
">"
,
"dateOfIssue"
:
"*"
,
"placeOfBirth"
:
"x"
,
"surname"
:
"D"
,
"givenName"
:
"o"
,
"placeOfOrigin"
:
"v"
,
"identityCard"
:
"^"
,
"nationality"
:
"<"
,
"eyeColor"
:
">"
,
"fake_height"
:
"*"
,
}
# legend name used for different label categories in the plots
legend_name
=
{
"all"
:
"all"
,
"sex"
:
"sex"
,
"height"
:
"height"
,
"dateOfBirth"
:
"date of birth"
,
"dateOfExpiry"
:
"date of expiry"
,
"dateOfIssue"
:
"date of issue"
,
"placeOfBirth"
:
"place of birth"
,
"surname"
:
"last name"
,
"givenName"
:
"first name"
,
"placeOfOrigin"
:
"place of origin"
,
"identityCard"
:
"card number"
,
"nationality"
:
"nationality"
,
"eyeColor"
:
"eye color"
,
"fake_height"
:
"fake_height"
,
}
# extract the information using the different selected token selection mechanisms (probability modes)
for
m
in
PROBA
:
# load the correct model
model
.
load_model
(
NAME
)
# update the probability mode
model
.
set_proba_mode
(
m
)
# save the name of the used model
NAME_r
=
NAME
# to the amount of specified fine-tune steps
for
r
in
range
(
RETRAIN
+
1
):
# extract the informations from the specified test set
# set to False, if just the plots should be generated from previous saved extractions
if
True
:
# extract the answers
scores_dict
,
scores
,
console
=
model
.
generate
(
dataset
.
get_test
(),
0
,
nb_used
,
data_bert
=
dataset_bert
.
get_test
())
# update list of summarized results
all_scores
.
append
(
scores
)
# save detailed results, such that figures can be created/modified at a later point
os
.
chdir
(
home_path
)
with
open
(
"scores_dict_{}_{}_{}"
.
format
(
r
,
dir2
,
MODEL
)
+
".p"
,
"wb"
)
as
f
:
pickle
.
dump
([
scores_dict
,
console
],
f
)
# just generate figures
else
:
# load results from previous extractions
os
.
chdir
(
home_path
)
with
open
(
"scores_dict_{}_{}_{}"
.
format
(
r
,
dir2
,
MODEL
)
+
".p"
,
'rb'
)
as
fp
:
preds
=
pickle
.
load
(
fp
)
scores_dict
=
preds
[
0
]
console
=
preds
[
1
]
# print all scores
# show amount of true positive, false positive, true negative and false negative
keys
=
scores_dict
.
keys
()
buckets
=
len
(
scores_dict
[
"all"
][
"count"
])
lim
=
[
x
/
buckets
for
x
in
range
(
buckets
)]
for
key
in
keys
:
# old plots, have results presented as a line plot
if
False
:
plt
.
title
(
legend_name
[
key
])
plt
.
plot
(
lim
,
scores_dict
[
key
][
"tp"
],
'g+-'
,
label
=
"tp"
)
plt
.
plot
(
lim
,
scores_dict
[
key
][
"tn"
],
'go--'
,
label
=
"tn"
)
plt
.
plot
(
lim
,
scores_dict
[
key
][
"fp"
],
'r+-'
,
label
=
"fp"
)
plt
.
plot
(
lim
,
scores_dict
[
key
][
"fn"
],
'ro--'
,
label
=
"fn"
)
tmp
=
[
120
,
500
,
1200
,
2000
,
5000
]
for
upper
in
tmp
:
if
scores_dict
[
key
][
"tp"
][
0
]
+
scores_dict
[
key
][
"fp"
][
0
]
<
upper
:
break
plt
.
ylim
([
0
,
upper
])
plt
.
xlabel
(
"network confidence score"
)
plt
.
ylabel
(
"# examples"
)
plt
.
legend
()
os
.
chdir
(
home_path
)
plt
.
savefig
(
"f_tp_{}_{}.eps"
.
format
(
key
,
r
),
format
=
"eps"
)
plt
.
savefig
(
"f_tp_{}_{}.jpg"
.
format
(
key
,
r
),
format
=
"jpg"
)
plt
.
close
()
# present results as a barplot
else
:
# set the numbers in the bars manually as text label
# (allows to ignore 0 values, and have small font for small values)
manualNumberLabels
=
True
def
putNumber
(
X
,
Y
,
bottom
,
ax
,
upper
,
pos
=
0
,
color
=
"black"
):
"""
Put a labeled field in the plot with the numeric value of the ploted bar
:param X: list of the x values
:param Y: list of the y values
:param bottom: lower end of the plotted bar (list)
:param ax: the figure that should be edited
:param upper: upper limit of the y axis
:param pos: how much the number should be shifted to the left/right
:param color: color of the font
:return:
ax: return the updated figure
"""
# get spacing of the bars on the x-axis
b
=
X
[
1
]
-
X
[
0
]
# set offset to center based on the y scale
if
upper
>
1000
:
off
=
[
15
,
10
]
else
:
off
=
[
2
,
1
]
# set a numeric label for all bars
for
i
,
x
in
enumerate
(
X
):
# if the value is bigger then 3% of the maximum representable value, use standart format
# (assuming the bar is big enought to contain the entire label)
if
Y
[
i
]
>
0.03
*
upper
:
ax
.
text
(
x
-
b
/
4
,
Y
[
i
]
/
2
+
bottom
[
i
]
-
off
[
0
],
str
(
int
(
Y
[
i
])),
color
=
"black"
,
fontsize
=
10
,
bbox
=
{
'edgecolor'
:
"white"
,
'facecolor'
:
'white'
,
'alpha'
:
0
,
'pad'
:
2
})
# if the value is bigger then 0, but the bar is small, use smaller font and
# allow small horizontal deplacments to avoid overlap
elif
Y
[
i
]
>
0
:
ax
.
text
(
x
-
b
/
4
+
b
/
4
*
pos
,
Y
[
i
]
/
2
+
bottom
[
i
]
-
off
[
1
],
str
(
int
(
Y
[
i
])),
color
=
"black"
,
fontsize
=
6
,
bbox
=
{
'edgecolor'
:
"white"
,
'facecolor'
:
'white'
,
'alpha'
:
0
,
'pad'
:
2
})
return
ax
width
=
0.08
fig
,
ax
=
plt
.
subplots
()
#115, 150
tmp
=
[
150
,
500
,
1150
,
2000
,
5000
]
for
upper
in
tmp
:
if
scores_dict
[
key
][
"tp"
][
0
]
+
scores_dict
[
key
][
"fp"
][
0
]
<
upper
:
break
plt
.
ylim
([
0
,
upper
])
tmp
=
copy
.
deepcopy
(
scores_dict
[
key
][
"tp"
])
for
t
,
_
in
enumerate
(
tmp
):
tmp
[
t
]
=
int
(
tmp
[
t
])
p1
=
ax
.
bar
(
lim
,
tmp
,
width
,
label
=
'tp'
,
color
=
"lime"
,
hatch
=
'/'
,
edgecolor
=
"limegreen"
)
if
manualNumberLabels
:
ax
=
putNumber
(
lim
,
tmp
,
[
0
for
_
in
lim
],
ax
,
upper
,
pos
=
1.5
,
color
=
"darkslategray"
)
bottom
=
copy
.
deepcopy
(
tmp
)
tmp
=
copy
.
deepcopy
(
scores_dict
[
key
][
"tn"
])
for
t
,
_
in
enumerate
(
tmp
):
tmp
[
t
]
=
int
(
tmp
[
t
])
p2
=
ax
.
bar
(
lim
,
tmp
,
width
,
bottom
=
bottom
,
label
=
'tn'
,
color
=
"orange"
,
hatch
=
'-'
,
edgecolor
=
"darkorange"
)
if
manualNumberLabels
:
ax
=
putNumber
(
lim
,
tmp
,
bottom
,
ax
,
upper
,
pos
=
-
0.25
,
color
=
"saddlebrown"
)
for
i
,
_
in
enumerate
(
bottom
):
bottom
[
i
]
+=
tmp
[
i
]
tmp
=
copy
.
deepcopy
(
scores_dict
[
key
][
"fn"
])
for
t
,
_
in
enumerate
(
tmp
):
tmp
[
t
]
=
int
(
tmp
[
t
])
p3
=
ax
.
bar
(
lim
,
tmp
,
width
,
bottom
=
bottom
,
label
=
'fn'
,
color
=
"darkgreen"
,
hatch
=
'x'
,
edgecolor
=
"green"
)
if
manualNumberLabels
:
ax
=
putNumber
(
lim
,
tmp
,
bottom
,
ax
,
upper
,
pos
=
0.625
,
color
=
"darkolivegreen"
)
for
i
,
_
in
enumerate
(
bottom
):
bottom
[
i
]
+=
tmp
[
i
]
tmp
=
copy
.
deepcopy
(
scores_dict
[
key
][
"fp"
])
for
t
,
_
in
enumerate
(
tmp
):
tmp
[
t
]
=
int
(
tmp
[
t
])
p4
=
ax
.
bar
(
lim
,
tmp
,
width
,
bottom
=
bottom
,
label
=
'fp'
,
color
=
"red"
,
hatch
=
'.'
,
edgecolor
=
"tomato"
)
if
manualNumberLabels
:
ax
=
putNumber
(
lim
,
tmp
,
bottom
,
ax
,
upper
,
pos
=
1.5
,
color
=
"maroon"
)
for
i
,
_
in
enumerate
(
bottom
):
bottom
[
i
]
+=
tmp
[
i
]
# ax.axhline(0, color='grey', linewidth=0.8)
ax
.
set_ylabel
(
'# ID cards per category'
)
ax
.
set_xlabel
(
'threshold for confidence score $c$'
)
ax
.
set_title
(
legend_name
[
key
])
ax
.
set_xticks
(
lim
)
lim_str
=
[]
for
l
in
lim
:
lim_str
.
append
(
str
(
l
))
ax
.
set_xticklabels
(
lim_str
)
ax
.
legend
()
# Label with label_type 'center' instead of the default 'edge'
for
p
in
[
p1
,
p2
,
p3
,
p4
]:
if
not
manualNumberLabels
:
ax
.
bar_label
(
p
,
label_type
=
'center'
)
# ax.bar_label(p2)
ax
.
legend
(
loc
=
'upper center'
,
bbox_to_anchor
=
(
0.5
,
1.0
),
ncol
=
4
,
fancybox
=
False
,
shadow
=
False
)
#plt.legend(loc='center right')
os
.
chdir
(
home_path
)
plt
.
savefig
(
"f_tp_{}_{}.eps"
.
format
(
key
,
r
),
format
=
"eps"
)
plt
.
savefig
(
"f_tp_{}_{}.jpg"
.
format
(
key
,
r
),
format
=
"jpg"
)
plt
.
close
()
# ---------------
if
False
:
plt
.
title
(
legend_name
[
key
])
plt
.
plot
(
lim
,
scores_dict
[
key
][
"f1"
],
'go-'
,
label
=
"F1"
)
plt
.
plot
(
lim
,
scores_dict
[
key
][
"recall"
],
'bx:'
,
label
=
"recall"
)
plt
.
plot
(
lim
,
scores_dict
[
key
][
"precision"
],
'md--'
,
label
=
"precision"
)
plt
.
ylim
([
0
,
1
])
plt
.
xlabel
(
"network confidence score"
)
plt
.
ylabel
(
"score"
)
plt
.
legend
()
os
.
chdir
(
home_path
)
plt
.
savefig
(
"f_sc_{}_{}.eps"
.
format
(
key
,
r
),
format
=
"eps"
)
plt
.
savefig
(
"f_sc_{}_{}.jpg"
.
format
(
key
,
r
),
format
=
"jpg"
)
plt
.
close
()
# ----------------
if
False
:
plt
.
title
(
legend_name
[
key
])
plt
.
plot
(
lim
,
scores_dict
[
key
][
"count"
],
"ko-"
)
tmp
=
[
150
,
500
,
1000
,
2000
,
5000
]
for
upper
in
tmp
:
if
scores_dict
[
key
][
"count"
][
0
]
<
upper
:
break
plt
.
ylim
([
0
,
upper
])
plt
.
xlabel
(
"network confidence score"
)
plt
.
ylabel
(
"# examples"
)
os
.
chdir
(
home_path
)
plt
.
savefig
(
"f_conf_{}_{}.eps"
.
format
(
key
,
r
),
format
=
"eps"
)
plt
.
savefig
(
"f_conf_{}_{}.jpg"
.
format
(
key
,
r
),
format
=
"jpg"
)
plt
.
close
()
# ----------------
if
False
:
plt
.
title
(
legend_name
[
key
])
plt
.
plot
(
lim
,
K_alpha
(
scores_dict
[
key
][
"tp"
],
scores_dict
[
key
][
"tn"
],
scores_dict
[
key
][
"fp"
],
scores_dict
[
key
][
"fn"
],
1
),
'co-'
,
label
=
"$
\\
alpha=1$"
)
plt
.
plot
(
lim
,
K_alpha
(
scores_dict
[
key
][
"tp"
],
scores_dict
[
key
][
"tn"
],
scores_dict
[
key
][
"fp"
],
scores_dict
[
key
][
"fn"
],
2
),
'cx:'
,
label
=
"$
\\
alpha=2$"
)
plt
.
plot
(
lim
,
K_alpha
(
scores_dict
[
key
][
"tp"
],
scores_dict
[
key
][
"tn"
],
scores_dict
[
key
][
"fp"
],
scores_dict
[
key
][
"fn"
],
10
),
'cd--'
,
label
=
"$
\\
alpha=10$"
)
plt
.
plot
(
lim
,
K_alpha
(
scores_dict
[
key
][
"tp"
],
scores_dict
[
key
][
"tn"
],
scores_dict
[
key
][
"fp"
],
scores_dict
[
key
][
"fn"
],
100
),
'c*'
,
label
=
"$
\\
alpha=100$"
)
plt
.
ylim
([
0
,
1
])
plt
.
xlabel
(
"network confidence score"
)
plt
.
ylabel
(
"$W_{
\\
alpha}$"
)
plt
.
legend
()
os
.
chdir
(
home_path
)
plt
.
savefig
(
"f_k_{}_{}.eps"
.
format
(
key
,
r
),
format
=
"eps"
)
plt
.
savefig
(
"f_k_{}_{}.jpg"
.
format
(
key
,
r
),
format
=
"jpg"
)
plt
.
close
()
# ---------------------------------------------
plt
.
title
(
"summary for all keys"
)
for
key
in
keys
:
try
:
c
=
legend_color
[
key
]
n
=
legend_name
[
key
]
m
=
legend_marker
[
key
]
except
:
c
=
"yellow"
n
=
"unk"
m
=
"x"
plt
.
plot
(
lim
,
scores_dict
[
key
][
"f1"
],
label
=
n
,
color
=
c
,
marker
=
m
)
plt
.
xlabel
(
"threshold for confidence score $c$"
)
plt
.
ylabel
(
"f1 score"
)
plt
.
ylim
([
-
0.05
,
1.4
])
# plt.legend()
plt
.
legend
(
loc
=
'upper center'
,
bbox_to_anchor
=
(
0.5
,
1.0
),
ncol
=
3
,
fancybox
=
False
,
shadow
=
False
)
os
.
chdir
(
home_path
)
plt
.
savefig
(
"f_f1_{}.eps"
.
format
(
r
),
format
=
"eps"
)
plt
.
savefig
(
"f_f1_{}.jpg"
.
format
(
r
),
format
=
"jpg"
)
plt
.
close
()
# ---------------------------------------------
for
alpha
in
[
1
,
2
,
10
,
100
]:
plt
.
title
(
"summary for all keys"
)
for
key
in
keys
:
try
:
c
=
legend_color
[
key
]
n
=
legend_name
[
key
]
m
=
legend_marker
[
key
]
except
:
c
=
"yellow"
n
=
"unk"
m
=
"x"
plt
.
plot
(
lim
,
K_alpha
(
scores_dict
[
key
][
"tp"
],
scores_dict
[
key
][
"tn"
],
scores_dict
[
key
][
"fp"
],
scores_dict
[
key
][
"fn"
],
alpha
),
label
=
n
,
color
=
c
,
marker
=
m
,
linestyle
=
"--"
)
plt
.
xlabel
(
"threshold for confidence score $c$"
)
plt
.
ylabel
(
"$W_{
\\
alpha}$ score ($
\\
alpha=$"
+
str
(
alpha
)
+
")"
)
plt
.
ylim
([
-
0.05
,
1.4
])
# plt.legend()
plt
.
legend
(
loc
=
'upper center'
,
bbox_to_anchor
=
(
0.5
,
1.0
),
ncol
=
3
,
fancybox
=
False
,
shadow
=
False
)
os
.
chdir
(
home_path
)
plt
.
savefig
(
"f_k_{}_{}.eps"
.
format
(
alpha
,
r
),
format
=
"eps"
)
plt
.
savefig
(
"f_k_{}_{}.jpg"
.
format
(
alpha
,
r
),
format
=
"jpg"
)
plt
.
close
()
with
open
(
"all_scores"
+
".json"
,
"w"
)
as
f
:
json
.
dump
(
all_scores
,
f
)
# dont to it the last time (saves time, and there is no use to train once more)
if
r
<
RETRAIN
:
onlySave
=
False
else
:
onlySave
=
True
testset
+=
1
model
,
dataset
,
NAME_r
,
CHECKPOINT
=
retrain
(
model
,
dataset
,
NAME_r
,
CHECKPOINT
,
EPOCHS
,
end
=
end
,
onlySave
=
onlySave
,
console
=
console
,
nb_used
=
nb_used
,
dir2
=
dir2
,
testset
=
testset
,
dataset_bert
=
dataset_bert
)
return
all_scores
if
False
:
print
(
"{} Test on part CH dataset (test) {}"
.
format
(
"="
*
100
,
"="
*
100
))
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
)
if
False
:
print
(
"{} Test on new CH mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"CH"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"CH"
,
testset
=
1
)
if
False
:
print
(
"{} Test on new CH1 mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"CH1"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"CH1"
,
testset
=
1
)
if
False
:
print
(
"{} Test on new CH10 mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"CH10"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"CH10"
,
testset
=
1
)
if
False
:
print
(
"{} Test on new CH50 mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"CH50"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"CH50"
,
testset
=
1
)
if
False
:
print
(
"{} Test on new FI mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"FI"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"FI"
,
testset
=
1
)
if
False
:
print
(
"{} Test on new FI mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"DE"
,
end
=
""
,
testset
=
1
)
dataset2
.
load_data
(
dir
=
"DE"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
combo_model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"DE"
,
testset
=
1
,
dataset_bert
=
dataset2
)
if
True
:
print
(
"{} Test on new DE mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"FI"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"FI"
,
testset
=
1
,
dataset_bert
=
dataset
)
if
False
:
print
(
"{} Test on new DEL label mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"DEL"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"DEL"
,
testset
=
1
)
if
False
:
print
(
"{} Test on new DEN number mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"DEN"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"DEN"
,
testset
=
1
)
if
False
:
print
(
"{} Test on new DED dcoulbe mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"DED"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"DED"
,
testset
=
1
)
if
False
:
print
(
"{} Test on new FR mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"CH50"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"CH50"
,
testset
=
1
)
if
False
:
print
(
"{} Test on new IT mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"IT"
,
end
=
""
,
testset
=
1
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
""
,
dir2
=
"IT"
,
testset
=
1
)
if
False
:
print
(
"{} Test on FR mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"fr_mixed_full"
,
end
=
"_fr"
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
"fr"
)
if
False
:
print
(
"{} Test on FI mixed dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"fi_mixed_full"
,
end
=
"_fi"
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
"fi"
)
if
False
:
print
(
"{} Test on CH dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"ch_full"
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
)
if
False
:
print
(
"{} Test on FR dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"fr_full_surname"
,
end
=
"_fr"
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
"fr"
)
if
False
:
print
(
"{} Test on FI dataset {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"fi_full_surname"
,
end
=
"_fi"
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
"fi"
)
if
False
:
print
(
"{} Test on FI dataset with Finnish names {}"
.
format
(
"="
*
100
,
"="
*
100
))
dataset
.
load_data
(
dir
=
"fi_full_surname_fi_names"
,
end
=
"_fi"
)
all_scores
=
generate
(
model
,
dataset
,
NAME
,
RETRAIN
,
CHECKPOINT
,
EPOCHS
,
all_scores
,
end
=
"fi2"
)
print
(
"{}
\n
Summary
\n
{}"
.
format
(
"="
*
100
,
"="
*
100
))
for
item
in
all_scores
:
print
(
item
)
Event Timeline
Log In to Comment