Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F102923269
parse_est_times.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Feb 25, 13:03
Size
2 KB
Mime Type
text/x-python
Expires
Thu, Feb 27, 13:03 (2 d)
Engine
blob
Format
Raw Data
Handle
24375619
Attached To
R3704 elastic-yarn
parse_est_times.py
View Options
#!/usr/bin/env python
import
sys
import
os
from
contextlib
import
nested
filename
=
sys
.
argv
[
1
]
if
len
(
sys
.
argv
)
>
1
:
outfilebase
=
sys
.
argv
[
2
]
else
:
outfilebase
=
filename
.
rsplit
(
"."
)[
0
]
done_token
=
"WAIT_DONE"
fin_token
=
"FIN_TIME: "
pred_token
=
"PREDICTIONS: "
pred_end_token
=
"GOT_MEM:"
with
open
(
filename
)
as
f
:
lines
=
f
.
readlines
()
# Output files for the decission correctness and drift files
dcr_file
=
outfilebase
+
".dcr"
dpcr_file
=
outfilebase
+
".dpcr"
ddr_file
=
outfilebase
+
".ddr"
# Output files for change counts of time and memory predictions
cgt_file
=
outfilebase
+
".cgt"
cgm_file
=
outfilebase
+
".cgm"
# Output file for memory change values
cgmd_file
=
outfilebase
+
".cgmd"
with
nested
(
open
(
dcr_file
,
"w"
),
open
(
dpcr_file
,
"w"
),
open
(
ddr_file
,
"w"
),
open
(
cgt_file
,
"w"
),
open
(
cgm_file
,
"w"
),
open
(
cgmd_file
,
"w"
))
as
(
cf
,
cpf
,
df
,
ctf
,
cmf
,
cmdf
):
for
line
in
lines
:
idx
=
line
.
find
(
done_token
)
if
idx
==
-
1
:
continue
# Harvest the end time
idx
=
line
.
find
(
fin_token
)
fin_time
=
int
(
line
[
idx
+
len
(
fin_token
):]
.
split
(
" "
)[
0
])
# Iterate through the predictions
idx
=
line
.
find
(
pred_token
)
end_idx
=
line
.
find
(
pred_end_token
)
predictions
=
[]
for
pred
in
line
[
idx
+
len
(
pred_token
):
end_idx
]
.
split
(
"["
)[
1
:]:
predictions
.
append
(
pred
.
split
(
"]"
)[
0
])
prev_est
=
0
prev_emem
=
0
est_changes
=
0
emem_changes
=
0
for
p
in
predictions
:
if
p
.
find
(
","
)
==
-
1
:
predictions
.
remove
(
p
)
continue
crt
=
int
(
p
.
split
(
","
)[
0
])
est
=
int
(
p
.
split
(
","
)[
1
])
emem
=
int
(
p
.
split
(
","
)[
2
])
# Corectness: difference between total time and estimation
cf
.
write
(
str
(
fin_time
-
est
)
+
"
\n
"
)
cpf
.
write
(
str
(
fin_time
-
crt
-
est
)
+
"
\n
"
)
# Drift: difference between previous time estimation adjusted
# with elapsed time
if
not
prev_est
==
0
:
df
.
write
(
str
(
est
-
(
prev_est
+
(
crt
-
prev_crt
)))
+
"
\n
"
)
# Count the number of changes in the predictions, both time-wise and
# memory-wise
if
not
crt
==
0
:
if
not
est
==
prev_est
:
est_changes
+=
1
if
not
emem
==
prev_emem
:
emem_changes
+=
1
cmdf
.
write
(
str
(
emem
-
prev_emem
)
+
"
\n
"
)
prev_crt
=
crt
prev_est
=
est
prev_emem
=
emem
ctf
.
write
(
str
(
est_changes
)
+
"
\n
"
)
cmf
.
write
(
str
(
emem_changes
)
+
"
\n
"
)
Event Timeline
Log In to Comment