diff --git a/facetnet.py b/facetnet.py index c74b3b3..0e9d084 100644 --- a/facetnet.py +++ b/facetnet.py @@ -1,425 +1,440 @@ """ Modified by Matthias Rüegg (matthias.ruegg@unil.ch) on August 13 2019 Based on a program created by created by github.com/blmoistawinde (https://github.com/blmoistawinde/facetnet-python) Copyright 2019 __UNIL__. All rights reserved. This file is part of facetnet-python-unil. facetnet-python-unil is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. facetnet-python-unil is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with facetnet-python-unil. If not, see . """ +import logging +from tqdm import tqdm import numpy as np import matplotlib.pyplot as plt import seaborn as sns # from sklearn.metrics import mutual_info_score def soft_modularity(wc, dc_inv, xc, lc): # Calculate the soft modularity as defined by equation (7) temp = np.linalg.multi_dot([dc_inv, xc, lc]) n = wc.shape[0] qc_s = np.trace(np.linalg.multi_dot([temp.T, wc, temp])) - np.linalg.multi_dot( [np.ones((1, n)), wc.T, temp, temp.T, wc, np.ones((n, 1))]) return qc_s[0, 0] def soft_modularity_alt(soft_comm, W): # from the original facetnet-python project N = W.shape[0] ret = np.trace(soft_comm.T*W*soft_comm) one = np.matrix(np.ones((N, 1))) ret -= np.array(one.T*W.T*soft_comm*soft_comm.T*W*one).squeeze() return ret def similarity(adj_mat, weighted, gamma=0.2): # calculate a similarity matrix from the adjacency matrix, basically an exponential scaling (see paper sec 4.2) n, m = adj_mat.shape sim = np.zeros(adj_mat.shape) if weighted: for i in range(n): for j in range(m): if adj_mat[i, j] != 0: sim[i, j] = np.exp(-1.0/(gamma * adj_mat[i, j])) else: sim[i, j] = 0.0 else: sim = adj_mat / 2.0 for i in range(n): sim[i, i] = 1.0 sim /= np.sum(sim) return sim def normalize_rows(w: np.ndarray): # normalize matrix w row-wise row_norm = np.linalg.norm(w, ord=1, axis=1) for i in range(w.shape[0]): if row_norm[i] != 0: w[i, :] /= row_norm[i] return w def normalize_cols(xc: np.ndarray): # normalize matrix xc column-wise col_norm = np.linalg.norm(xc, ord=1, axis=0) for j in range(xc.shape[1]): if col_norm[j] != 0: xc[:, j] /= col_norm[j] return xc def kl_divergence(a_mat, b_mat): # Kullback-Leibler divergence for all non zero elements of a_mat and b_mat assert a_mat.shape == b_mat.shape n, m = a_mat.shape res = 0.0 for i in range(n): for j in range(m): if a_mat[i, j] > 1e-20 and b_mat[i, j] > 1e-20: res += a_mat[i, j] * np.log(a_mat[i, j] / b_mat[i, j]) - a_mat[i, j] + b_mat[i, j] return res def dc_inverse(yc, n): # dc[i, i] = sum_j{[xc * lc]_ij}, see paragraph 2.5.1 dc_inv = np.zeros(n) for i in range(n): dc_inv[i] = 1 / np.sum(yc[i, :]) dc_inv = np.matrix(np.diag(dc_inv)) return dc_inv def initialize_cover(n, m): # Initializing for matrices xc and lc at time step t = 0 - print("Initialization step (t = 0)") + logging.info("Initialization step (t = 0)") xc = np.random.rand(n, m) xc = normalize_cols(xc) lc = np.diag(np.random.rand(m)) lc /= np.trace(lc) yc = xc.dot(lc) return yc def compensate_node_demography(xc, idmap0, idmap, idmap_inv0): # Account for demographic changes in the nodes list (see paper paragraph 3.1) reserved_rows = [idmap_inv0[x] for x in idmap0 if x in idmap] num_new = len(set(idmap) - set(idmap0)) num_old = len(reserved_rows) xc = xc[reserved_rows, :] xc = normalize_cols(xc) xc *= num_old / (num_old + num_new) if num_new != 0: xc = np.pad(xc, ((0, num_new), (0, 0)), mode='constant', constant_values=(0.0, 0.0)) return xc def update_cover(yzc, wc, alpha, mvarmax=None, is_plot=False): """ Update function for xc (capital X and capital L) with static or variable community number. Implementation of equations (4) and (5) ((9) and (10) respectively). If alpha = 1.0 (i.e. ignoring yc when calculating the community representation), yc is only used to determine (n, m) but its values are ignored (yc is to be filled with NaN). :param yzc: xc(t-1) * lc(t-1) (xc(t-1) * lc(t-1) * xc(t-1).T respectively), see paragraph 2.2.2 (3.2.2) :param wc: adjacency matrix, needs to be normalized with total matrix sum, see paragraph 4.2 :param alpha: snap-shot vs. temporal cost weight, see paragraph 2.2 :param mvarmax: None if m (community number) is fixed between timesteps, m_max otherwise :param is_plot: if True, a convergence plot is shown :return: xc_res, lc_res """ if mvarmax is None: n, m = yzc.shape else: n = wc.shape[0] m = mvarmax xc_old = np.random.rand(n, m) xc_old = normalize_cols(xc_old) lc_old = np.diag(np.random.rand(m)) lc_old /= np.trace(lc_old) eps = 1e-5 # see paragraph 4.1.2 max_iter = int(1e3) gamma = np.zeros(max_iter) if mvarmax is not None: cs = np.zeros(max_iter) ct = np.zeros(max_iter) for it in range(max_iter): wc_approx = np.linalg.multi_dot([xc_old, lc_old, xc_old.T]) xc_new = np.zeros(xc_old.shape) lc_new = np.zeros(lc_old.shape) for k in range(m): if mvarmax is None: for i in range(n): for j in range(n): if wc[i, j] != 0: # avoid divisions by zero in wc_approx xc_new[i, k] += wc[i, j] * lc_old[k, k] * \ xc_old[j, k] / wc_approx[i, j] lc_new[k, k] += wc[i, j] * xc_old[i, k] * \ xc_old[j, k] / wc_approx[i, j] xc_new[i, k] *= (2 * alpha * xc_old[i, k]) xc_new[i, k] += (1 - alpha) * yzc[i, k] lc_new[k, k] *= (alpha * lc_old[k, k]) lc_new[k, k] += (1 - alpha) * sum(yzc[:, k]) else: for i in range(n): for j in range(n): xc_new[i, k] += (alpha * wc[i, j] + (1 - alpha) * yzc[i, j]) * \ lc_old[k, k] * xc_old[j, k] / wc_approx[i, j] lc_new[k, k] += (alpha * wc[i, j] + (1 - alpha) * yzc[i, j]) * \ xc_old[i, k] * xc_old[j, k] / wc_approx[i, j] xc_new[i, k] *= xc_old[i, k] lc_new[k, k] *= lc_old[k, k] xc_new = normalize_cols(xc_new) lc_new /= np.trace(lc_new) if mvarmax is None: gamma[it] = alpha * kl_divergence(wc, np.linalg.multi_dot( [xc_new, lc_new, xc_new.T])) + (1 - alpha) * kl_divergence(yzc, xc_new.dot(lc_new)) else: temp = np.linalg.multi_dot([xc_new, lc_new, xc_new.T]) cs[it] = kl_divergence(wc, temp) ct[it] = kl_divergence(yzc, temp) gamma[it] = alpha * cs[it] + (1 - alpha) * ct[it] if it == 0: gamma_min = gamma[it] else: if abs((gamma[it] - gamma_min)) / gamma_min < eps: if is_plot: plt.plot(gamma[0:it]) plt.ylabel(r"$\gamma$") plt.xlabel("# iter") plt.title("# communities: {}".format(m)) - cg = sns.clustermap(wc, figsize=(12, 12), cmap='hot') - perm = cg.data2d.index.values - temp = np.linalg.multi_dot([xc_new, lc_new, xc_new.T]) - temp = temp[perm, :] - temp = temp[:, perm] - fig, ax = plt.subplots(figsize=(12, 12)) - ax.matshow(temp, cmap='hot') - ax.set_xticklabels(perm) - ax.set_yticklabels(perm) plt.show() - print("====> no iter: {}".format(it)) + logging.info("converged after no iter: {}".format(it)) return xc_res, lc_res if gamma[it] < gamma[it - 1]: gamma_min = gamma[it] xc_res = xc_new lc_res = lc_new elif it > 2: # cost function starts to diverge after reaching local minimum if is_plot: plt.plot(gamma[0:it]) plt.ylabel(r"$\gamma$") plt.xlabel("# iter") plt.title("# communities: {}".format(m)) plt.show() - print("====> no iter: {}".format(it)) + logging.info("converged after no iter: {}".format(it)) return xc_res, lc_res xc_old = xc_new lc_old = lc_new - raise Exception('Maximum iteration number reached: {}'.format(max_iter)) + raise Exception('Maximum iteration number({}) exceeded.'.format(max_iter)) def extract_cover(xc, lc, n, xc_prev=None, lc_prev=None): # Extract the community structure yc = xc.dot(lc) dc_inv = dc_inverse(yc, n) soft_comm = dc_inv.dot(yc) comm_net = np.linalg.multi_dot([lc, xc.T, soft_comm]) if xc_prev is None: evol_net = None else: evol_net = np.linalg.multi_dot([lc_prev, xc_prev.T, soft_comm]) return soft_comm, dc_inv, comm_net, evol_net -def read_edge_list(filename, weighted=False, permute=False): +def read_edge_list(filename, weighted=False, permute=False, mask=[]): # Input: Entire path to edge-list file if permute: - print("Reading and permuting edge file {}".format(filename)) + logging.info("Reading and permuting edge file {}".format(filename)) else: - print("Reading edge file {}".format(filename)) + logging.info("Reading edge file {}".format(filename)) idmap = set() edge_cache = {} with open(filename) as f: for line in f: if weighted: u, v, w = [int(x) for x in line.strip().split()] else: tmp = [int(x) for x in line.strip().split()] u, v, w = tmp[0], tmp[1], 1.0 - edge_cache[(u, v)] = w - idmap.add(u) - idmap.add(v) + if u not in mask and v not in mask: + edge_cache[(u, v)] = w + idmap.add(u) + idmap.add(v) idmap = list(idmap) idmap_inv = {nid: i for i, nid in enumerate(idmap)} n = len(idmap) adj_mat = np.zeros((n, n)) for (u, v), w in edge_cache.items(): adj_mat[idmap_inv[u], idmap_inv[v]] = w adj_mat += adj_mat.T if permute: tmp_a = adj_mat[np.triu_indices(adj_mat.shape[0], k=1)].flatten() np.random.shuffle(tmp_a) tmp_m = np.zeros(adj_mat.shape) tmp_m[np.triu_indices(tmp_m.shape[0], 1)] = tmp_a adj_mat = tmp_m + tmp_m.T wc = adj_mat / adj_mat.sum() return idmap, idmap_inv, wc def facetnet_step(edgelist_path, alpha, m, weighted=True, show_plot=False, xc_prev=None, lc_prev=None, idmap0=None, idmap_inv0=None, permutations=0): """ Applies one step of the facetNet algorithm :param edgelist_path: Path to current edgelist :param alpha: Alpha cost weight :param weighted: True if W is weighted, False if it is binary :param m: Number of (a priori) communities, if m = -1, the algorithm detects the best number of communities :param show_plot: Bool, show convergence plot of gamma :param xc_prev: X(t-1) matrix :param lc_prev: LAMBDA(t-1) matrix :param idmap0: previous id map :param idmap_inv0: inverse previous idmap :param permutations: Number of permutations to be applied on the edgelist (for significance investigation of Qs) :return: idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff """ + mask = [158, 99] - idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff = step( - edgelist_path, alpha, m, weighted, show_plot, xc_prev, lc_prev, idmap0, idmap_inv0, permute=False) + idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff, wc = step( + edgelist_path, alpha, m, weighted, False, xc_prev, lc_prev, idmap0, idmap_inv0, permute=False, ind_mask=mask) + + if show_plot: + fig, ax = plt.subplots(figsize=(12, 12)) + plot_clustermap(ax, wc, xc, lc, idmap) + plt.show() qc_dist = np.empty(permutations) - for p in range(permutations): - _, _, _, _, qc_s, _, _, _, _ = step( - edgelist_path, alpha, m, weighted, show_plot, xc_prev, lc_prev, idmap0, idmap_inv0, permute=True) - qc_dist[p] = qc_s if permutations > 0: - print(qc_dist) + for p in tqdm(range(permutations)): + _, _, _, _, qc_s, _, _, _, _, _ = step( + edgelist_path, alpha, m, weighted, False, xc_prev, lc_prev, idmap0, idmap_inv0, permute=True, ind_mask=mask) + qc_dist[p] = qc_s fig, ax = plt.subplots() ax.hist(qc_dist, bins=int(np.ceil(np.sqrt(len(qc_dist)))), color='black') ax.axvline(x=qc_s_res, color='r', linestyle='dashed', linewidth=2) ax.set_xlabel('$Q_s: \in [0, 1]$') ax.set_title( 'dashed: $w_c$, black: pemuted $triang(w_c)$, n={}'.format(permutations)) plt.show() - return idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff + return idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff, qc_dist -def step(edgelist_path, alpha, m, weighted=True, show_plot=False, xc_prev=None, lc_prev=None, idmap0=None, idmap_inv0=None, permute=False): +def step(edgelist_path, alpha, m, weighted=True, show_plot=False, xc_prev=None, lc_prev=None, idmap0=None, idmap_inv0=None, permute=False, ind_mask=[]): # Params see idmap, idmap_inv, wc = read_edge_list( - edgelist_path, weighted, permute=permute) + edgelist_path, weighted, permute=permute, mask=ind_mask) n = len(idmap) max_comm = 6 - print("Calculate network representation for {communities} communities".format( + logging.info("Calculate network representation for {communities} communities".format( communities="variable number of" if m < 0 else "{}".format(m))) if xc_prev is None: # time step 0 if m > 0: yc = initialize_cover(n, m) xc, lc = update_cover(yc, wc, 1.0, is_plot=show_plot) else: qc_s_res = [] xc_res = [] lc_res = [] zc = initialize_cover(n, n) for i in range(2, max_comm): xc_ret, lc_ret = update_cover( zc, wc, 1.0, mvarmax=i, is_plot=show_plot) xc_res.append(xc_ret) lc_res.append(lc_ret) dc_inv = dc_inverse(xc_ret.dot(lc_ret), n) qc_s_res.append(soft_modularity(wc, dc_inv, xc_ret, lc_ret)) idx_max = np.argmax(qc_s_res) xc = xc_res[idx_max] lc = lc_res[idx_max] soft_comm, dc_inv, comm_net, evol_net = extract_cover(xc, lc, n) else: xc = compensate_node_demography(xc_prev, idmap0, idmap, idmap_inv0) xc_prev = xc # need demography compensated version of xc_prev to calculate evol_net if m > 0: yc = xc.dot(lc_prev) xc, lc = update_cover(yc, wc, alpha, is_plot=show_plot) else: qc_s_res = [] xc_res = [] lc_res = [] zc = np.linalg.multi_dot([xc, lc_prev, xc.T]) for i in range(2, max_comm): xc_ret, lc_ret = update_cover( zc, wc, alpha, mvarmax=i, is_plot=show_plot) xc_res.append(xc_ret) lc_res.append(lc_ret) dc_inv = dc_inverse(xc_ret.dot(lc_ret), n) qc_s_res.append(soft_modularity(wc, dc_inv, xc_ret, lc_ret)) idx_max = np.argmax(qc_s_res) xc = xc_res[idx_max] lc = lc_res[idx_max] - soft_comm, dc_inv, comom_net, evol_net = extract_cover( + soft_comm, dc_inv, comm_net, evol_net = extract_cover( xc, lc, n, xc_prev, lc_prev) if m > 0: qc_s_res = soft_modularity(wc, dc_inv, xc, lc) else: qc_s = max(qc_s_res) if show_plot: no_iter = np.arange(2, max_comm, 1) plt.plot(no_iter, qc_s_res) plt.ylabel("Qs") plt.xlabel("# communities") plt.title(r"$Q_s,max = $ {} for {} communities".format( qc_s, idx_max + 2)) plt.show() # plt.savefig("{}_qc.png".format(edgelist_path)) # qc_s_alt = soft_modularity_alt(soft_comm, wc) # DEBUG if m < 0: m_eff = idx_max + 2 else: m_eff = m - return idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff + return idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff, wc + + +def plot_clustermap(ax, wc, xc, lc, idmap): + cg = sns.clustermap(wc, figsize=(12, 12), cmap='hot') + perm = cg.data2d.index.values + l_ticks = [idmap[i] for i in perm] + cg.ax_heatmap.set_xticklabels(l_ticks) + cg.ax_heatmap.set_yticklabels(l_ticks) + temp = np.linalg.multi_dot([xc, lc, xc.T]) + temp = temp[perm, :] + temp = temp[:, perm] + ax.matshow(temp, cmap='hot') + ax.set_xticks(range(len(l_ticks))) + ax.set_xticklabels(l_ticks) + ax.set_yticks(range(len(l_ticks))) + ax.set_yticklabels(l_ticks) diff --git a/facetnet_evol.py b/facetnet_evol.py index e6a7242..39d0160 100644 --- a/facetnet_evol.py +++ b/facetnet_evol.py @@ -1,86 +1,88 @@ """ Modified by Matthias Rüegg (matthias.ruegg@unil.ch) on August 13 2019 Based on a program created by created by github.com/blmoistawinde (https://github.com/blmoistawinde/facetnet-python) Copyright 2019 __UNIL__. All rights reserved. This file is part of facetnet-python-unil. facetnet-python-unil is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. facetnet-python-unil is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with facetnet-python-unil. If not, see . """ import argparse import sys import facetnet as fn import numpy as np import pandas as pd if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("t_steps", help="number of networks", type=int) - parser.add_argument("alpha", help="alpha cost weight (0.0, 1.0] for gamma = CS*alpha + CT*(1-alpha)", type=float) + parser.add_argument( + "alpha", help="alpha cost weight (0.0, 1.0] for gamma = CS*alpha + CT*(1-alpha)", type=float) parser.add_argument("m_cluster", help="number of clusters", type=int) - parser.add_argument("nw_folder", help="folder holding the network data in edgelist.X form", type=str) - parser.add_argument("--show", help="show convergence plot of gamma", type=str) + parser.add_argument( + "nw_folder", help="folder holding the network data in edgelist.X form", type=str) + parser.add_argument("res_folder", help="path to results folder, where" + "xcap.dat, lcap.dat, idmap.dat, idmap_inv.dat, soft_comm.csv" + "will be saved", type=str) + parser.add_argument( + "--show", help="show convergence plot of gamma", type=bool, default=False) args = parser.parse_args() if args.alpha <= 0.0 or args.alpha > 1.0: print("Alpha value must be in (0.0, 1.0], terminate script") sys.exit(1) print("Analyzing networks from: {}".format(args.nw_folder)) - print("# time-steps: {}, alpha: {}, # communities: {}".format(args.t_steps, args.alpha, args.m_cluster)) - - if args.show is not None: - is_show = True - else: - is_show = False + print("# time-steps: {}, alpha: {}, # communities: {}".format(args.t_steps, + args.alpha, args.m_cluster)) for t in range(args.t_steps): print("time-step:", t) edge_list = "{}/{}.edgelist".format(args.nw_folder, t) if t == 0: - idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff = fn.facetnet_step( - edge_list, args.alpha, args.m_cluster, weighted=True, show_plot=is_show) + idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff, _ = fn.facetnet_step( + edge_list, args.alpha, args.m_cluster, weighted=True, show_plot=args.show) else: - idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff = fn.facetnet_step( - edge_list, args.alpha, args.m_cluster, weighted=True, show_plot=is_show, + idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff, _ = fn.facetnet_step( + edge_list, args.alpha, args.m_cluster, weighted=True, show_plot=args.show, xc_prev=xc, lc_prev=lc, idmap0=idmap, idmap_inv0=idmap_inv) print("Soft modularity Qc={}".format(qc_s)) # Save results for analysis clusters = [] for i in range(m_eff): clusters.append("cluster_{}".format(i)) - if args.xcap is not None and args.lcap is not None and args.idmap is not None and args.idmap_inv is not None: - evol_net_df = pd.DataFrame(data=evol_net, columns=clusters) - evol_net_df.to_csv("{}/evol_net_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step), - index=False) + evol_net_df = pd.DataFrame(data=evol_net, columns=clusters) + evol_net_df.to_csv("{}/evol_net_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, t), + index=False) soft_comm_df = pd.DataFrame(data=soft_comm, columns=clusters) soft_comm_df.insert(0, column="id", value=idmap) - soft_comm_df.to_csv("{}/soft_comm_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step), + soft_comm_df.to_csv("{}/soft_comm_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, t), index=False) comm_net_df = pd.DataFrame(data=comm_net, columns=clusters) - comm_net_df.to_csv("{}/comm_net_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step), + comm_net_df.to_csv("{}/comm_net_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, t), index=False) lc_df = pd.DataFrame(data=lc, columns=clusters) - lc_df.to_csv("{}/lc_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step), index=False) + lc_df.to_csv("{}/lc_alpha{}_nw{}.csv".format(args.res_folder, + args.alpha, t), index=False) print("Results written to: {}".format(args.nw_folder)) diff --git a/facetnet_exp_4_1_1.py b/facetnet_exp_4_1_1.py index af5ddac..fd75782 100644 --- a/facetnet_exp_4_1_1.py +++ b/facetnet_exp_4_1_1.py @@ -1,75 +1,75 @@ """ Modified by Matthias Rüegg (matthias.ruegg@unil.ch) on August 13 2019 Based on a program created by created by github.com/blmoistawinde (https://github.com/blmoistawinde/facetnet-python) Copyright 2019 __UNIL__. All rights reserved. This file is part of facetnet-python-unil. facetnet-python-unil is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. facetnet-python-unil is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with facetnet-python-unil. If not, see . """ import argparse import numpy as np import facetnet as fn import networkx as nx import community as community_louvain import matplotlib.pyplot as plt if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--edgelist", help="full path to edge-list file", type=str, default='paper_experiments/synthetic_dataset_1/0.edgelist') parser.add_argument("--res_folder", help="path to results folder, where" "xcap.dat, lcap.dat, idmap.dat, idmap_inv.dat, soft_comm.csv" "will be saved", type=str, default=None) parser.add_argument( "--show", help="show convergence plot of gamma", type=str) args = parser.parse_args() print("Analyzing network: {}".format(args.edgelist)) if args.show is not None: is_show = True else: is_show = False max_comm = 7 Q = [] Qs = [] for m in range(2, max_comm): print("# clusters: {}".format(m)) - idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, _ = fn.facetnet_step( + idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, _, _ = fn.facetnet_step( args.edgelist, 1.0, m, show_plot=is_show, weighted=False) Qs.append(qc_s) graph = nx.read_edgelist(args.edgelist, nodetype=int) if args.res_folder is not None: np.savetxt("{}/soft_comm_m{}.csv".format(args.res_folder, m), soft_comm, delimiter=",") np.savetxt("{}/comm_net_m{}.csv".format(args.res_folder, m), comm_net, delimiter=",") hard_comm = np.argmax(soft_comm, axis=1) cluster_labels = [str(i) for i in range(0, hard_comm.shape[1])] partition = dict(list(enumerate(hard_comm.flat))) Q.append(community_louvain.modularity(partition, graph)) plt.plot(np.arange(2, max_comm, 1), Q, 'ro-') plt.plot(np.arange(2, max_comm, 1), Qs, 'bs-') plt.legend(["Q", "Qs"]) plt.xticks(np.arange(2, max_comm, 1)) plt.ylim([0.0, 0.6]) plt.xlabel("Community number") plt.show() if args.res_folder is not None: print("Results written to: {}".format(args.res_folder)) diff --git a/facetnet_step.py b/facetnet_step.py index cd990ed..ac0da0a 100644 --- a/facetnet_step.py +++ b/facetnet_step.py @@ -1,124 +1,134 @@ """ Modified by Matthias Rüegg (matthias.ruegg@unil.ch) on August 13 2019 Based on a program created by created by github.com/blmoistawinde (https://github.com/blmoistawinde/facetnet-python) Copyright 2019 __UNIL__. All rights reserved. This file is part of facetnet-python-unil. facetnet-python-unil is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. facetnet-python-unil is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with facetnet-python-unil. If not, see . """ import argparse import sys +import logging import time import numpy as np import facetnet as fn import pandas as pd if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "edgelist", help="full path to edge-list file", type=str) parser.add_argument( "alpha", help="alpha cost weight (0.0, 1.0] for gamma = CS*alpha + CT*(1-alpha)", type=float) parser.add_argument("m_cluster", help="number of clusters", type=int) parser.add_argument("res_folder", help="path to results folder, where" "xcap.dat, lcap.dat, idmap.dat, idmap_inv.dat, soft_comm.csv" "will be saved", type=str) parser.add_argument( "t_step", help="network time step, to save result", type=int) parser.add_argument("--xcap", help="full path to xcap.dat file", type=str) parser.add_argument("--lcap", help="full path to lcap.dat file", type=str) parser.add_argument( "--idmap", help="full path to idmap.dat file", type=str) parser.add_argument( "--idmap_inv", help="full path to idmap_inv.dat file", type=str) parser.add_argument( "-permutations", help="number of permutations for significance check of Qs", type=int, default=0) parser.add_argument( "--show", help="show convergence plot of gamma", type=str) args = parser.parse_args() + logging.basicConfig(filename="{}/alpha{}_nw{}_{}.log".format(args.res_folder, args.alpha, args.t_step, + time.strftime("%d-%m-%Y_%H-%M-%S", time.localtime())), level=logging.INFO) + if args.alpha <= 0.0 or args.alpha > 1.0: - print("Alpha value must be in (0.0, 1.0], terminate script") + logging.error("Alpha value must be in (0.0, 1.0], terminate script") sys.exit(1) - print("Analyzing network: {}".format(args.edgelist)) - print("# time-step: {}, alpha: {}, # communities: {}".format(args.t_step, - args.alpha, args.m_cluster)) + logging.info("Analyzing network: {}".format(args.edgelist)) + logging.info("# time-step: {}, alpha: {}, # communities: {}".format( + args.t_step, args.alpha, args.m_cluster)) start = time.time() if args.xcap is None and args.lcap is None and args.idmap is None and args.idmap_inv is None: - idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff = fn.facetnet_step( + idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff, qc_dist = fn.facetnet_step( args.edgelist, args.alpha, args.m_cluster, permutations=args.permutations, show_plot=args.show) elif args.xcap is not None and args.lcap is not None and args.idmap is not None and args.idmap_inv is not None: xc = np.load(args.xcap, allow_pickle=True) lc = np.load(args.lcap, allow_pickle=True) idmap = np.ndarray.tolist(np.load(args.idmap, allow_pickle=True)) idmap_inv = np.ndarray.tolist( np.load(args.idmap_inv, allow_pickle=True)) - idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff = fn.facetnet_step( + idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff, qc_dist = fn.facetnet_step( args.edgelist, args.alpha, args.m_cluster, permutations=args.permutation, show_plot=args.show, xc_prev=xc, lc_prev=lc, idmap0=idmap, idmap_inv0=idmap_inv) else: - print("Some of (xcap, ycap, idmap, idmap_inv) are given, some not") + logging.error( + "Some of (xcap, ycap, idmap, idmap_inv) are given, some not") sys.exit(1) stop = time.time() - print("Soft modularity Qc = ({})".format(qc_s)) - print("Elapsed time: {} sec".format(stop - start)) + logging.info("Soft modularity Qc = ({})".format(qc_s)) + logging.info("Elapsed time: {} sec".format(stop - start)) # Save end state of current step for input to next step xc.dump("{}/xcap_alpha{}_nw{}.dat".format(args.res_folder, args.alpha, args.t_step)) lc.dump("{}/lcap_alpha{}_nw{}.dat".format(args.res_folder, args.alpha, args.t_step)) np.asarray(idmap).dump( "{}/idmap_alpha{}_nw{}.dat".format(args.res_folder, args.alpha, args.t_step)) np.asarray(idmap_inv).dump( "{}/idmap_inv_alpha{}_nw{}.dat".format(args.res_folder, args.alpha, args.t_step)) # Save results for analysis clusters = [] for i in range(m_eff): clusters.append("cluster_{}".format(i)) if args.xcap is not None and args.lcap is not None and args.idmap is not None and args.idmap_inv is not None: evol_net_df = pd.DataFrame(data=evol_net, columns=clusters) evol_net_df.to_csv("{}/evol_net_step_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step), index=False) soft_comm_df = pd.DataFrame(data=soft_comm, columns=clusters) soft_comm_df.insert(0, column="id", value=idmap) soft_comm_df.to_csv("{}/soft_comm_step_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step), index=False) comm_net_df = pd.DataFrame(data=comm_net, columns=clusters) comm_net_df.to_csv("{}/comm_net_step_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step), index=False) lc_df = pd.DataFrame(data=lc, columns=clusters) lc_df.to_csv("{}/lcap_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step), index=False) if args.m_cluster < 0: qc_s_df = pd.DataFrame(data=qc_s, columns=["Q_s"]) qc_s_df.to_csv("{}/q_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step), index=False) - print("Results and state written to: {}".format(args.res_folder)) + if args.permutations > 0: + qc_dist_df = pd.DataFrame(data=qc_dist, columns=["Q_s"]) + qc_dist_df.to_csv("{}/q_permute_alpha{}_nw{}.csv".format( + args.res_folder, args.alpha, args.t_step), index=False) + + logging.info("Results and state written to: {}".format(args.res_folder))