facetnet.py
No OneTemporary
Actions

Subscribers

None

File Metadata

Created: Fri, May 17, 08:55

facetnet.py
View Options

	import numpy as np
	import matplotlib.pyplot as plt
	import pandas as pd
	# from sklearn.metrics import mutual_info_score


	def soft_moduarity(wc, dc_inv, xc, lc):
	# Calculate the soft modularity as defined by equation (7)

	temp = np.linalg.multi_dot([dc_inv, xc, lc])
	n = wc.shape[0]

	temp1 = np.linalg.multi_dot([temp.T, wc, temp])
	ones = np.ones((1, n))
	onesT = ones.T
	temp2 = np.linalg.multi_dot([np.ones((1, n)), wc.T, temp, temp.T, wc, np.ones((n, 1))])

	qc_s = np.trace(np.linalg.multi_dot([temp.T, wc, temp])) - np.linalg.multi_dot([np.ones((1, n)), wc.T, temp, temp.T, wc, np.ones((n, 1))])
	# Equivalent:
	# qc_s = np.trace(temp.T.dot(wc).dot(temp)) - np.ones((1, n)).dot(wc.T).dot(temp).dot(temp.T).dot(wc).dot(np.ones((n, 1)))

	return qc_s[0, 0]


	def soft_modularity_alt(soft_comm, W):
	N = W.shape[0]
	ret = np.trace(soft_comm.TWsoft_comm)
	one = np.matrix(np.ones((N,1)))
	ret -= np.array(one.TW.Tsoft_commsoft_comm.TW*one).squeeze()
	return ret


	def similarity(adj_mat, weighted, gamma=0.2):
	# calculate a similarity matrix from the adjacency matrix, basically an exponential scaling

	n, m = adj_mat.shape
	sim = np.zeros(adj_mat.shape)
	if weighted:
	for i in range(n):
	for j in range(m):
	if adj_mat[i, j] != 0:
	sim[i, j] = np.exp(-1.0/(gamma * adj_mat[i, j]))
	else:
	sim[i, j] = 0.0
	else:
	sim = adj_mat / 2.0
	for i in range(n):
	# populate the diagonal
	sim[i, i] = 1.0
	sim /= np.sum(sim)
	return sim


	def normalize_rows(w: np.ndarray):
	# normalize matrix w row-wise
	row_norm = np.linalg.norm(w, ord=1, axis=1)
	for i in range(w.shape[0]):
	if row_norm[i] != 0:
	w[i, :] /= row_norm[i]
	return w


	def normalize_cols(xc: np.ndarray):
	# normalize matrix xc column-wise
	col_norm = np.linalg.norm(xc, ord=1, axis=0)
	for j in range(xc.shape[1]):
	if col_norm[j] != 0:
	xc[:, j] /= col_norm[j]
	return xc


	def kl_divergence(a_mat, b_mat):
	# Kullback-Leibler divergence for all non zero elements of a_mat and b_mat

	assert a_mat.shape == b_mat.shape

	n, m = a_mat.shape
	res = 0.0
	for i in range(n):
	for j in range(m):
	if a_mat[i, j] > 1e-20 and b_mat[i, j] > 1e-20:
	res += a_mat[i, j] * np.log(a_mat[i, j] / b_mat[i, j]) - a_mat[i, j] + b_mat[i, j]
	return res


	def param_update(yc, wc, alpha, is_plot=False):
	# Update function for xc (capital X and capital L), implementation of equations (4) and (5)

	n, m = yc.shape

	# xc_old = np.random.rand(xc.shape[0], xc.shape[1])
	xc_old = np.random.rand(n, m)
	xc_old = normalize_cols(xc_old)

	# lc_old = lc_new
	lc_old = np.diag(np.random.rand(m))
	lc_old /= np.trace(lc_old)

	eps = 1e-5
	max_iter = int(1e3)
	gamma = np.zeros(max_iter)

	for it in range(max_iter):
	wc_approx = xc_old.dot(lc_old.dot(xc_old.T))
	xc_new = np.zeros(xc_old.shape)
	lc_new = np.zeros(lc_old.shape)

	for k in range(m):
	for i in range(n):
	for j in range(n):
	if wc[i, j] != 0: # avoid divisions by zero in wc_approx
	xc_new[i, k] += wc[i, j] * lc_old[k, k] * xc_old[j, k] / wc_approx[i, j]
	lc_new[k, k] += wc[i, j] * xc_old[i, k] * xc_old[j, k] / wc_approx[i, j]
	xc_new[i, k] = (2 alpha * xc_old[i, k])
	xc_new[i, k] += (1 - alpha) * yc[i, k]
	lc_new[k, k] = (alpha lc_old[k, k])
	lc_new[k, k] += (1 - alpha) * sum(yc[:, k])
	xc_new = normalize_cols(xc_new)
	lc_new /= np.trace(lc_new)

	gamma[it] = alpha * kl_divergence(wc, np.linalg.multi_dot([xc_new, lc_new, xc_new.T])) + (1 - alpha) * kl_divergence(yc, xc_new.dot(lc_new))

	if it == 0:
	gamma_min = gamma[it]
	else:
	if abs((gamma[it] - gamma_min)) / gamma_min < eps:
	if is_plot:
	plt.plot(gamma[0:it])
	plt.ylabel(r"$\gamma$")
	plt.xlabel("# iter")
	plt.show()
	print("====> no iter: {}".format(it))
	return xc_res, lc_res

	if gamma[it] < gamma[it - 1]:
	gamma_min = gamma[it]
	xc_res = xc_new
	lc_res = lc_new

	else:
	if is_plot:
	plt.plot(gamma[0:it])
	plt.ylabel(r"$\gamma$")
	plt.xlabel("# iter")
	plt.show()
	print("====> no iter: {}".format(it))
	return xc_res, lc_res

	xc_old = xc_new
	lc_old = lc_new
	raise Exception('Maximum iteration number reached: {}'.format(max_iter))


	def read_edge_list(filename, weighted=False):
	print("Reading edge file {}".format(filename))
	idmap = set()
	edge_cache = {}
	with open(filename) as f:
	for line in f:
	if weighted:
	u, v, w = [int(x) for x in line.strip().split()]
	else:
	tmp = [int(x) for x in line.strip().split()]
	u, v, w = tmp[0], tmp[1], 1.0
	edge_cache[(u, v)] = w
	idmap.add(u)
	idmap.add(v)
	idmap = list(idmap)
	idmap_inv = {nid: i for i, nid in enumerate(idmap)}
	n = len(idmap)
	adj_mat = np.zeros((n, n))
	for (u, v), w in edge_cache.items():
	adj_mat[idmap_inv[u], idmap_inv[v]] = w
	adj_mat += adj_mat.T
	return idmap, idmap_inv, adj_mat


	def facetnet_evolution(net_path, alpha, t_steps, m, show_plot=False):
	"""
	Applies facetNet algorithm on an entire set of consecutively named edgelist.X files. Can deal with appearing and
	disappearing number of nodes. Establishes soft community memberships to an a priori given number of communities (m)..

	:param net_path: Path to edge-lists
	:param alpha: Alpha cost weight
	:param t_steps: Total number of time-steps to be computed (must be at least # of edge-list files)
	:param m: Number of (a priori) communities
	:param show_plot: Bool, show convergence plot of gamma
	:return: No return value, saves soft_comm as soft_comm_alpha0.X_nwX.csv files in net_path folder
	"""

	clusters = []
	for i in range(m):
	clusters.append("cluster_{}".format(i))

	for t in range(t_steps):
	print("time-step:", t)
	idmap, idmap_inv, adj_mat = read_edge_list(net_path + "/%d.edgelist" % t, weighted=True)
	n = len(idmap)
	# wc = normalize_rows(adj_mat)
	wc = adj_mat / adj_mat.sum()

	print("Calculate network representation ...")
	if t == 0:
	xc = np.random.rand(n, m)
	xc = normalize_cols(xc)
	lc = np.diag(np.random.rand(m))
	lc /= np.trace(lc)
	yc = xc.dot(lc)
	xc, lc, = param_update(yc, wc, 1.0, show_plot)

	else:
	reserved_rows = [idmap_inv0[x] for x in idmap0 if x in idmap]
	num_new = len(set(idmap) - set(idmap0))
	num_old = len(reserved_rows)

	xc = xc[reserved_rows, :]
	xc = normalize_cols(xc)
	xc *= num_old / (num_old + num_new)
	if num_new != 0:
	xc = np.pad(xc, ((0, num_new), (0, 0)), mode='constant', constant_values=(1 / num_new, 1 / num_new))
	yc = xc.dot(lc)
	xc, lc, = param_update(yc, wc, alpha, show_plot)

	yc = xc.dot(lc)

	dc_inv = np.zeros(n)
	for i in range(n):
	dc_inv[i] = 1 / np.sum(yc[i, :])
	dc_inv = np.matrix(np.diag(dc_inv))

	soft_comm = dc_inv.dot(yc)

	df = pd.DataFrame(data=soft_comm, columns=clusters)
	df.insert(0, column="id", value=idmap)
	df.to_csv("{}/soft_comm_alpha{}_nw{}.csv".format(net_path, alpha, t), index=False)

	idmap0 = idmap
	idmap_inv0 = idmap_inv


	def facetnet_step(edgelist_path, alpha, m, weighted=True, show_plot=False, xc_prev=None, lc_prev=None, idmap0=None, idmap_inv0=None):
	"""
	Applies one step of the facetNet algorithm, equivalent to a single step of facetnet_evolution

	:param xc_prev: X(t-1) matrix
	:param lc_prev: LAMBDA(t-1) matrix
	:param idmap0: previous id map
	:param idmap_inv0: inverse previous idmap
	:param edgelist_path: Path to current edgelist
	:param alpha: Alpha cost weight
	:param weighted: True if W is weighted, False if it is binary
	:param m: Number of (a priori) communities
	:param show_plot: Bool, show convergence plot of gamma
	:return: idmap, idmap_inv, xc, lc, soft_comm_df
	"""

	clusters = []
	for i in range(m):
	clusters.append("cluster_{}".format(i))

	idmap, idmap_inv, adj_mat = read_edge_list(edgelist_path, weighted)
	n = len(idmap)
	# wc = normalize_rows(adj_mat)
	wc = adj_mat / adj_mat.sum()

	print("Calculate network representation ...")
	if xc_prev is None:
	print("This is an initilaizing step (t = 0)")
	xc = np.random.rand(n, m)
	xc = normalize_cols(xc)
	lc = np.diag(np.random.rand(m))
	lc /= np.trace(lc)
	yc = xc.dot(lc)
	xc, lc, = param_update(yc, wc, 1.0, show_plot)

	else:
	reserved_rows = [idmap_inv0[x] for x in idmap0 if x in idmap]
	num_new = len(set(idmap) - set(idmap0))
	num_old = len(reserved_rows)

	xc = xc_prev[reserved_rows, :]
	xc = normalize_cols(xc)
	xc *= num_old / (num_old + num_new)
	if num_new != 0:
	xc = np.pad(xc, ((0, num_new), (0, 0)), mode='constant', constant_values=(1 / num_new, 1 / num_new))
	yc = xc.dot(lc_prev)
	xc, lc, = param_update(yc, wc, alpha, show_plot)

	yc = xc.dot(lc)

	dc_inv = np.zeros(n)
	for i in range(n):
	dc_inv[i] = 1 / np.sum(yc[i, :])
	dc_inv = np.matrix(np.diag(dc_inv))

	soft_comm = dc_inv.dot(yc)

	comm_net = np.linalg.multi_dot([lc, xc.T, soft_comm])
	if xc_prev is None:
	evol_net = None
	else:
	evol_net = np.linalg.multi_dot([lc_prev, xc_prev.T, soft_comm])

	qc_s = soft_moduarity(wc, dc_inv, xc, lc)
	qc_s_alt = soft_modularity_alt(soft_comm, wc)
	print("Soft comm alt = {}".format(qc_s_alt))

	soft_comm_df = pd.DataFrame(data=soft_comm, columns=clusters)
	soft_comm_df.insert(0, column="id", value=idmap)

	comm_net_df = pd.DataFrame(data=comm_net, columns=clusters)

	evol_net_df = pd.DataFrame(data=evol_net, columns=clusters)

	return idmap, idmap_inv, xc, lc, qc_s, soft_comm_df, comm_net_df, evol_net_df

facetnet.pyNo OneTemporaryActions

File Metadata

facetnet.pyView Options

Event Timeline

facetnet.py
No OneTemporary
Actions

facetnet.py
View Options