Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F60752081
onepass.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, May 2, 09:33
Size
5 KB
Mime Type
text/x-python
Expires
Sat, May 4, 09:33 (2 d)
Engine
blob
Format
Raw Data
Handle
17406090
Attached To
R11301 MOLEKUEHL
onepass.py
View Options
import
numpy
as
np
import
timeit
import
gurobipy
as
gp
from
gurobipy
import
GRB
def
addvariables
(
Z
):
upperbounds
=
[]
I
=
[]
for
M
in
database_indices
:
m
=
len
(
data
[
'database_ncharges'
][
M
])
I
=
I
+
[(
i
,
j
,
M
)
for
i
in
range
(
m
)
for
j
in
range
(
n
)]
upperbounds
.
append
(
int
(
n
/
m
))
x
=
Z
.
addVars
(
I
,
vtype
=
GRB
.
BINARY
)
# dummy variables y associated to number of times molecule M is picked
y
=
Z
.
addVars
(
database_indices
,
vtype
=
"I"
,
lb
=
0
,
ub
=
upperbounds
)
print
(
"Variables added."
)
return
x
,
y
,
I
def
addconstraints
(
Z
,
x
,
y
):
# injection into [n]
Z
.
addConstrs
(
x
.
sum
(
'*'
,
j
,
'*'
)
<=
1
for
j
in
range
(
n
))
# use at least 80% of target
Z
.
addConstr
(
x
.
sum
()
>=
0.8
*
n
)
# additional constraints: take whole molecule or leave it out, and matching charges
for
M
in
database_indices
:
CM
=
data
[
'database_ncharges'
][
M
]
m
=
len
(
CM
)
# the number of indices of M used (counting multiple use) is at least 55% of its size
Z
.
addConstr
(
x
.
sum
(
'*'
,
'*'
,
M
)
>=
0.55
*
m
*
y
[
M
])
# each index in M is used at most y[M] times
Z
.
addConstrs
(
x
.
sum
(
i
,
'*'
,
M
)
<=
y
[
M
]
for
i
in
range
(
m
))
# ignore incompatible charges and atoms of charge 1
for
i
in
range
(
m
):
for
j
in
range
(
n
):
if
(
CM
[
i
]
!=
CT
[
j
]):
#if(CM[i] != CT[j] or CM[i]==1):
Z
.
addConstr
(
x
[
i
,
j
,
M
]
==
0
)
print
(
"Constraints added."
)
return
0
def
setobjective
(
Z
,
x
,
y
):
expr
=
gp
.
QuadExpr
()
print
(
"Constructing objective function... "
)
key
=
0
for
M
in
database_indices
:
key
=
key
+
1
Mol
=
data
[
'database_CMs'
][
M
]
m
=
len
(
Mol
)
expr
.
addTerms
(
-
m
,
y
[
M
])
for
i
in
range
(
m
):
for
j
in
range
(
m
):
for
k
in
range
(
n
):
for
l
in
range
(
k
,
n
):
expr
.
add
(
x
[
i
,
k
,
M
]
*
x
[
j
,
l
,
M
],
np
.
abs
(
T
[
k
,
l
]
-
Mol
[
i
,
j
])
**
2
)
print
(
key
,
" / "
,
size_database
)
Z
.
setObjective
(
expr
,
GRB
.
MINIMIZE
)
print
(
"Objective function set."
)
return
0
# prints mappings of positions (indices+1) of each molecule (before preprocess) to positions inside target (before preprocess, but the hydrogens are at the end anyway)
def
print_sols
(
Z
,
x
,
y
):
SolCount
=
Z
.
SolCount
for
solnb
in
range
(
SolCount
):
print
()
print
(
"--------------------------------"
)
Z
.
setParam
(
"SolutionNumber"
,
solnb
)
print
(
"Solution number"
,
solnb
+
1
,
", objective value"
,
Z
.
PoolObjVal
)
for
M
in
database_indices
:
amount_picked
=
int
(
np
.
rint
(
y
[
M
]
.
Xn
))
if
amount_picked
!=
0
:
m
=
len
(
data
[
'database_ncharges'
][
M
])
U
=
np
.
zeros
((
m
,
amount_picked
))
# constructing U
for
i
in
range
(
m
):
k
=
0
for
j
in
range
(
n
):
if
x
[
i
,
j
,
M
]
.
Xn
==
1
and
sum
(
U
[:,
k
]
!=
0
)
<
0.55
*
m
:
U
[
i
,
k
]
=
j
+
1
k
=
k
+
1
# reading U
for
k
in
range
(
amount_picked
):
if
np
.
any
(
U
[:,
k
]
!=
0
):
if
k
==
0
:
print
(
"Molecule"
,
data
[
'database_labels'
][
M
],
"has been picked"
,
amount_picked
,
"time(s) ( size"
,
len
(
data
[
'database_ncharges'
][
M
]),
", used"
,
sum
([
x
[
i
,
j
,
M
]
.
Xn
for
i
in
range
(
m
)
for
j
in
range
(
n
)]),
")"
)
print
(
k
+
1
,
end
=
": "
)
for
i
in
range
(
m
):
if
U
[
i
,
k
]
!=
0
:
print
(
oldindex
(
i
,
M
)
+
1
,
"->"
,
U
[
i
,
k
],
end
=
", "
)
print
(
"used"
,
sum
(
U
[:,
k
]
!=
0
))
# converts new index (with hydrogens removed) to old index.
def
oldindex
(
i
,
M
):
k
=
0
notones
=
0
CM
=
olddata
[
'database_ncharges'
][
M
]
while
notones
<
i
or
CM
[
k
]
==
1
:
if
CM
[
k
]
!=
1
:
notones
=
notones
+
1
k
=
k
+
1
return
k
def
main
():
# construction of the model
start
=
timeit
.
default_timer
()
Z
=
gp
.
Model
()
Z
.
setParam
(
'OutputFlag'
,
1
)
x
,
y
,
I
=
addvariables
(
Z
)
addconstraints
(
Z
,
x
,
y
)
setobjective
(
Z
,
x
,
y
)
stop
=
timeit
.
default_timer
()
print
(
"Model setup: "
,
stop
-
start
,
"s"
)
# optimization
# trying parameters from Z.tune() and others.
Z
.
setParam
(
"DegenMoves"
,
2
)
Z
.
setParam
(
"ZeroHalfCuts"
,
2
)
Z
.
setParam
(
"BQPCuts"
,
2
)
Z
.
setParam
(
"RLTCuts"
,
2
)
# time (seconds) and gap (%) limit, (max) number of solutions kept in memory
Z
.
setParam
(
"TimeLimit"
,
180
)
Z
.
setParam
(
"MIPGap"
,
0.33
)
# 1.5-approx. algorithm (not really)
Z
.
setParam
(
"PoolSolutions"
,
10
)
print
(
"------------"
)
print
(
"Optimization"
)
print
(
"------------"
)
Z
.
optimize
()
print
(
"Optimization runtime: "
,
Z
.
RunTime
,
"s"
)
if
(
Z
.
status
==
3
):
print
(
"Model was proven to be infeasible."
)
return
1
print_sols
(
Z
,
x
,
y
)
return
0
# global constants
data
=
np
.
load
(
"datafilter.npz"
,
allow_pickle
=
True
)
olddata
=
np
.
load
(
"data.npz"
,
allow_pickle
=
True
)
target_index
=
1
print
(
"Target"
,
data
[
'target_labels'
][
target_index
])
CT
=
data
[
'target_ncharges'
][
target_index
]
T
=
data
[
'target_CMs'
][
target_index
]
n
=
len
(
data
[
'target_ncharges'
][
target_index
])
#database_indices=np.loadtxt("filtered"+str(target_index), dtype=int)
#database_indices=database_indices[:100]
database_indices
=
range
(
500
)
size_database
=
len
(
database_indices
)
main
()
Event Timeline
Log In to Comment