Page MenuHomec4science

onepass.py
No OneTemporary

File Metadata

Created
Sat, Apr 27, 09:51

onepass.py

import numpy as np
import timeit
import gurobipy as gp
from gurobipy import GRB
def addvariables(Z):
upperbounds=[]
I=[]
J=[]
for M in database_indices:
m=len(data['database_ncharges'][M])
CM=data['database_ncharges'][M]
adjM=connectivity_data['frag_adj_matrices'][M]
I=I+[(i,j,M) for i in range(m) for j in range(n) if CM[i] == CT[j]] # if condition excludes j; i always takes all m values
J=J+[(i,j,M) for i in range(m) for j in range(i+1,m) if adjM[i,j]]
upperbounds.append(int(n/m))
x=Z.addVars(I, vtype=GRB.BINARY)
e=Z.addVars(J, vtype=GRB.BINARY)
# dummy variables y associated to number of times molecule M is picked
# temporarily set binary
y=Z.addVars(database_indices, vtype="I", lb=0, ub=upperbounds)
print("Variables added.")
return x,y,e,I,J
def addconstraints(Z,x,y,e,I,J):
# bijection into [n]
Z.addConstrs(x.sum('*',j,'*') == 1 for j in range(n))
# additional constraints: take whole molecule or leave it out, and matching charges
for M in database_indices:
CM=data['database_ncharges'][M]
m=len(CM)
# the number of indices of M used (counting multiple use) is at least mol_percent of its size
Z.addConstr(x.sum('*','*',M) >= mol_percent*m*y[M])
# each index in M is used at most y[M] times
Z.addConstrs(x.sum(i,'*',M) <= y[M] for i in range(m))
# forces #edges >= #nodes - 1, which must be true for any connected graph
expr=gp.LinExpr()
for (i,j) in [u[:2] for u in J if u[2]==M]:
Z.addConstr(e[i,j,M] <= x.sum(i,'*',M))
Z.addConstr(e[i,j,M] <= x.sum(j,'*',M))
#Z.addConstr(e[i,j,M] >= x.sum(i,'*',M)+x.sum(j,'*',M) - 1)
d=Z.addVar(vtype="B") # decision variable for minimum
Z.addConstr(e[i,j,M] >= x.sum(i,'*',M)-d*4)
Z.addConstr(e[i,j,M] >= x.sum(i,'*',M)-(1-d)*4)
expr += e[i,j,M]
for (i,j) in [v[:2] for v in I if v[2]==M]:
expr -= x[i,j,M]
Z.addConstr(expr+y[M] >= 0)
print("Constraints added.")
return 0
def setobjective(Z,x,y,I):
expr=gp.QuadExpr()
print("Constructing objective function... ")
key=0
for M in database_indices:
key=key+1
Mol=data['database_CMs'][M]
m=len(Mol)
expr.addTerms(-1, y[M])
adjM=connectivity_data['frag_adj_matrices'][M]
for (i,k) in [v[:2] for v in I if v[2]==M]:
for (j,l) in [v[:2] for v in I if v[2]==M and v[1]>k]:
expr.add(x[i,k,M] * x[j,l,M], np.abs(T[k,l]-Mol[i,j])**2)
Z.addConstr(x[i,k,M]+x[j,l,M]-1 <= (adjM[i,j] == adjT[k,l]))
print(key, " / ", size_database)
Z.setObjective(expr, GRB.MINIMIZE)
print("Objective function set.")
return 0
# prints mappings of positions (indices+1) of each molecule (before preprocess) to positions inside target (before preprocess, but the hydrogens are at the end anyway)
def print_sols(Z, x, y, I):
SolCount=Z.SolCount
print("Target has size", n)
for solnb in range(SolCount):
print()
print("--------------------------------")
Z.setParam("SolutionNumber",solnb)
print("Solution number", solnb+1, ", objective value", Z.PoolObjVal)
for M in database_indices:
amount_picked=int(np.rint(y[M].Xn))
if amount_picked != 0:
m=len(data['database_ncharges'][M])
label=data['database_labels'][M]
U=np.zeros((m,amount_picked))
# constructing U
for i in range(m):
k=0
for j in [v[1] for v in I if v[0]==i and v[2]==M]:
if np.rint(x[i,j,M].Xn)==1 and sum(U[:,k]!=0) < mol_percent*m:
U[i,k]=j+1
k=k+1
# reading U
for k in range(amount_picked):
if np.any(U[:,k] != 0):
if k==0:
print("Molecule", label, "has been picked", amount_picked, "time(s) ( size", len(data['database_ncharges'][M]), ", used", sum([x[v].Xn for v in I if v[2]==M]), ")")
print(k+1, end=": ")
for i in range(m):
if U[i,k]!=0:
print(oldindex(i,M)+1, "->", U[i,k], end=", ")
print("used", sum(U[:,k]!=0))
# converts new index (with hydrogens removed) to old index.
# actually the hydrogens are always at the end so new index = old index
def oldindex(i, M):
"""
k=0
notones=0
CM=olddata['database_ncharges'][M]
while notones<i or CM[k]==1:
if CM[k]!=1:
notones=notones+1
k=k+1
return k
"""
return i
def main():
# construction of the model
start=timeit.default_timer()
Z = gp.Model()
Z.setParam('OutputFlag',1)
x,y,e,I,J=addvariables(Z)
addconstraints(Z,x,y,e,I,J)
setobjective(Z,x,y,I)
stop=timeit.default_timer()
print("Model setup: ", stop-start, "s")
# optimization
# trying parameters from Z.tune() and others.
Z.setParam("DegenMoves", 2)
Z.setParam("ZeroHalfCuts", 2)
Z.setParam("BQPCuts", 2)
Z.setParam("RLTCuts", 2)
# time (seconds) and gap (%) limit, (max) number of solutions kept in memory
Z.setParam("TimeLimit", 500)
Z.setParam("MIPGap", 0.33) # 1.5-approx. algorithm (not really)
Z.setParam("PoolSolutions", 5)
print("------------")
print("Optimization")
print("------------")
Z.optimize()
print("Optimization runtime: ", Z.RunTime, "s")
if(Z.status == 3):
print("Model was proven to be infeasible.")
return 1
print_sols(Z,x,y,I)
return 0
# global settings modifiable
target_index=1
mol_percent=0.55 # portion of each molecule to use if taken
database_indices=range(1550,1750) # consider only first 500 molecules
# global constants
connectivity_data = np.load("connectivity_data.npz", allow_pickle=True)
data=np.load("datafilter.npz", allow_pickle=True)
olddata=np.load("data.npz", allow_pickle=True)
CT=data['target_ncharges'][target_index]
T=data['target_CMs'][target_index]
n=len(data['target_ncharges'][target_index])
size_database=len(database_indices)
adjT=connectivity_data['target_adj_matrices'][target_index]
main()
#print(connectivity_data['frag_adj_matrices'][0][0,1])

Event Timeline