diff --git a/connectivity_datafilter.npz b/connectivity_datafilter.npz new file mode 100644 index 0000000..89b1385 Binary files /dev/null and b/connectivity_datafilter.npz differ diff --git a/onepass.py b/onepass.py index aa9a9b5..8c2af0e 100644 --- a/onepass.py +++ b/onepass.py @@ -1,184 +1,182 @@ import numpy as np import timeit import gurobipy as gp from gurobipy import GRB def addvariables(Z): upperbounds=[] I=[] J=[] for M in database_indices: m=len(data['database_ncharges'][M]) CM=data['database_ncharges'][M] adjM=connectivity_data['frag_adj_matrices'][M] I=I+[(i,j,M) for i in range(m) for j in range(n) if CM[i] == CT[j]] # if condition excludes j; i always takes all m values J=J+[(i,j,M) for i in range(m) for j in range(i+1,m) if adjM[i,j]] - upperbounds.append(int(n/m)) - + upperbounds.append(int(n/(mol_percent*m))) + x=Z.addVars(I, vtype=GRB.BINARY) e=Z.addVars(J, vtype=GRB.BINARY) # dummy variables y associated to number of times molecule M is picked - # temporarily set binary y=Z.addVars(database_indices, vtype="I", lb=0, ub=upperbounds) print("Variables added.") return x,y,e,I,J def addconstraints(Z,x,y,e,I,J): # bijection into [n] Z.addConstrs(x.sum('*',j,'*') == 1 for j in range(n)) # additional constraints: take whole molecule or leave it out, and matching charges for M in database_indices: CM=data['database_ncharges'][M] m=len(CM) # the number of indices of M used (counting multiple use) is at least mol_percent of its size Z.addConstr(x.sum('*','*',M) >= mol_percent*m*y[M]) # each index in M is used at most y[M] times Z.addConstrs(x.sum(i,'*',M) <= y[M] for i in range(m)) # forces #edges >= #nodes - 1, which must be true for any connected graph expr=gp.LinExpr() for (i,j) in [u[:2] for u in J if u[2]==M]: Z.addConstr(e[i,j,M] <= x.sum(i,'*',M)) Z.addConstr(e[i,j,M] <= x.sum(j,'*',M)) #Z.addConstr(e[i,j,M] >= x.sum(i,'*',M)+x.sum(j,'*',M) - 1) d=Z.addVar(vtype="B") # decision variable for minimum Z.addConstr(e[i,j,M] >= x.sum(i,'*',M)-d*4) Z.addConstr(e[i,j,M] >= x.sum(i,'*',M)-(1-d)*4) expr += e[i,j,M] for (i,j) in [v[:2] for v in I if v[2]==M]: expr -= x[i,j,M] Z.addConstr(expr+y[M] >= 0) print("Constraints added.") return 0 def setobjective(Z,x,y,I): expr=gp.QuadExpr() print("Constructing objective function... ") key=0 for M in database_indices: key=key+1 Mol=data['database_CMs'][M] m=len(Mol) expr.addTerms(-1, y[M]) adjM=connectivity_data['frag_adj_matrices'][M] for (i,k) in [v[:2] for v in I if v[2]==M]: for (j,l) in [v[:2] for v in I if v[2]==M and v[1]>k]: expr.add(x[i,k,M] * x[j,l,M], np.abs(T[k,l]-Mol[i,j])**2) Z.addConstr(x[i,k,M]+x[j,l,M]-1 <= (adjM[i,j] == adjT[k,l])) print(key, " / ", size_database) Z.setObjective(expr, GRB.MINIMIZE) print("Objective function set.") return 0 # prints mappings of positions (indices+1) of each molecule (before preprocess) to positions inside target (before preprocess, but the hydrogens are at the end anyway) def print_sols(Z, x, y, I): SolCount=Z.SolCount print("Target has size", n) for solnb in range(SolCount): print() print("--------------------------------") Z.setParam("SolutionNumber",solnb) print("Solution number", solnb+1, ", objective value", Z.PoolObjVal) for M in database_indices: amount_picked=int(np.rint(y[M].Xn)) if amount_picked != 0: m=len(data['database_ncharges'][M]) label=data['database_labels'][M] U=np.zeros((m,amount_picked)) # constructing U for i in range(m): k=0 for j in [v[1] for v in I if v[0]==i and v[2]==M]: if np.rint(x[i,j,M].Xn)==1 and sum(U[:,k]!=0) < mol_percent*m: U[i,k]=j+1 k=k+1 # reading U for k in range(amount_picked): if np.any(U[:,k] != 0): if k==0: - print("Molecule", label, "has been picked", amount_picked, "time(s) ( size", len(data['database_ncharges'][M]), ", used", sum([x[v].Xn for v in I if v[2]==M]), ")") + print("Molecule", label, "has been picked", amount_picked, "time(s) ( size", m, ", used", sum([x[v].Xn for v in I if v[2]==M]), ")") print(k+1, end=": ") for i in range(m): if U[i,k]!=0: print(oldindex(i,M)+1, "->", U[i,k], end=", ") print("used", sum(U[:,k]!=0)) # converts new index (with hydrogens removed) to old index. # actually the hydrogens are always at the end so new index = old index def oldindex(i, M): """ k=0 notones=0 CM=olddata['database_ncharges'][M] while notones