diff --git a/facetnet.py b/facetnet.py
index c74b3b3..0e9d084 100644
--- a/facetnet.py
+++ b/facetnet.py
@@ -1,425 +1,440 @@
"""
Modified by Matthias Rüegg (matthias.ruegg@unil.ch) on August 13 2019
Based on a program created by created by github.com/blmoistawinde (https://github.com/blmoistawinde/facetnet-python)
Copyright 2019 __UNIL__. All rights reserved.
This file is part of facetnet-python-unil.
facetnet-python-unil is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
facetnet-python-unil is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with facetnet-python-unil. If not, see .
"""
+import logging
+from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# from sklearn.metrics import mutual_info_score
def soft_modularity(wc, dc_inv, xc, lc):
# Calculate the soft modularity as defined by equation (7)
temp = np.linalg.multi_dot([dc_inv, xc, lc])
n = wc.shape[0]
qc_s = np.trace(np.linalg.multi_dot([temp.T, wc, temp])) - np.linalg.multi_dot(
[np.ones((1, n)), wc.T, temp, temp.T, wc, np.ones((n, 1))])
return qc_s[0, 0]
def soft_modularity_alt(soft_comm, W):
# from the original facetnet-python project
N = W.shape[0]
ret = np.trace(soft_comm.T*W*soft_comm)
one = np.matrix(np.ones((N, 1)))
ret -= np.array(one.T*W.T*soft_comm*soft_comm.T*W*one).squeeze()
return ret
def similarity(adj_mat, weighted, gamma=0.2):
# calculate a similarity matrix from the adjacency matrix, basically an exponential scaling (see paper sec 4.2)
n, m = adj_mat.shape
sim = np.zeros(adj_mat.shape)
if weighted:
for i in range(n):
for j in range(m):
if adj_mat[i, j] != 0:
sim[i, j] = np.exp(-1.0/(gamma * adj_mat[i, j]))
else:
sim[i, j] = 0.0
else:
sim = adj_mat / 2.0
for i in range(n):
sim[i, i] = 1.0
sim /= np.sum(sim)
return sim
def normalize_rows(w: np.ndarray):
# normalize matrix w row-wise
row_norm = np.linalg.norm(w, ord=1, axis=1)
for i in range(w.shape[0]):
if row_norm[i] != 0:
w[i, :] /= row_norm[i]
return w
def normalize_cols(xc: np.ndarray):
# normalize matrix xc column-wise
col_norm = np.linalg.norm(xc, ord=1, axis=0)
for j in range(xc.shape[1]):
if col_norm[j] != 0:
xc[:, j] /= col_norm[j]
return xc
def kl_divergence(a_mat, b_mat):
# Kullback-Leibler divergence for all non zero elements of a_mat and b_mat
assert a_mat.shape == b_mat.shape
n, m = a_mat.shape
res = 0.0
for i in range(n):
for j in range(m):
if a_mat[i, j] > 1e-20 and b_mat[i, j] > 1e-20:
res += a_mat[i, j] * np.log(a_mat[i, j] /
b_mat[i, j]) - a_mat[i, j] + b_mat[i, j]
return res
def dc_inverse(yc, n):
# dc[i, i] = sum_j{[xc * lc]_ij}, see paragraph 2.5.1
dc_inv = np.zeros(n)
for i in range(n):
dc_inv[i] = 1 / np.sum(yc[i, :])
dc_inv = np.matrix(np.diag(dc_inv))
return dc_inv
def initialize_cover(n, m):
# Initializing for matrices xc and lc at time step t = 0
- print("Initialization step (t = 0)")
+ logging.info("Initialization step (t = 0)")
xc = np.random.rand(n, m)
xc = normalize_cols(xc)
lc = np.diag(np.random.rand(m))
lc /= np.trace(lc)
yc = xc.dot(lc)
return yc
def compensate_node_demography(xc, idmap0, idmap, idmap_inv0):
# Account for demographic changes in the nodes list (see paper paragraph 3.1)
reserved_rows = [idmap_inv0[x] for x in idmap0 if x in idmap]
num_new = len(set(idmap) - set(idmap0))
num_old = len(reserved_rows)
xc = xc[reserved_rows, :]
xc = normalize_cols(xc)
xc *= num_old / (num_old + num_new)
if num_new != 0:
xc = np.pad(xc, ((0, num_new), (0, 0)),
mode='constant', constant_values=(0.0, 0.0))
return xc
def update_cover(yzc, wc, alpha, mvarmax=None, is_plot=False):
"""
Update function for xc (capital X and capital L) with static or variable community number.
Implementation of equations (4) and (5) ((9) and (10) respectively).
If alpha = 1.0 (i.e. ignoring yc when calculating the community representation), yc is only
used to determine (n, m) but its values are ignored (yc is to be filled with NaN).
:param yzc: xc(t-1) * lc(t-1) (xc(t-1) * lc(t-1) * xc(t-1).T respectively), see paragraph 2.2.2 (3.2.2)
:param wc: adjacency matrix, needs to be normalized with total matrix sum, see paragraph 4.2
:param alpha: snap-shot vs. temporal cost weight, see paragraph 2.2
:param mvarmax: None if m (community number) is fixed between timesteps, m_max otherwise
:param is_plot: if True, a convergence plot is shown
:return: xc_res, lc_res
"""
if mvarmax is None:
n, m = yzc.shape
else:
n = wc.shape[0]
m = mvarmax
xc_old = np.random.rand(n, m)
xc_old = normalize_cols(xc_old)
lc_old = np.diag(np.random.rand(m))
lc_old /= np.trace(lc_old)
eps = 1e-5 # see paragraph 4.1.2
max_iter = int(1e3)
gamma = np.zeros(max_iter)
if mvarmax is not None:
cs = np.zeros(max_iter)
ct = np.zeros(max_iter)
for it in range(max_iter):
wc_approx = np.linalg.multi_dot([xc_old, lc_old, xc_old.T])
xc_new = np.zeros(xc_old.shape)
lc_new = np.zeros(lc_old.shape)
for k in range(m):
if mvarmax is None:
for i in range(n):
for j in range(n):
if wc[i, j] != 0: # avoid divisions by zero in wc_approx
xc_new[i, k] += wc[i, j] * lc_old[k, k] * \
xc_old[j, k] / wc_approx[i, j]
lc_new[k, k] += wc[i, j] * xc_old[i, k] * \
xc_old[j, k] / wc_approx[i, j]
xc_new[i, k] *= (2 * alpha * xc_old[i, k])
xc_new[i, k] += (1 - alpha) * yzc[i, k]
lc_new[k, k] *= (alpha * lc_old[k, k])
lc_new[k, k] += (1 - alpha) * sum(yzc[:, k])
else:
for i in range(n):
for j in range(n):
xc_new[i, k] += (alpha * wc[i, j] + (1 - alpha) * yzc[i, j]) * \
lc_old[k, k] * xc_old[j, k] / wc_approx[i, j]
lc_new[k, k] += (alpha * wc[i, j] + (1 - alpha) * yzc[i, j]) * \
xc_old[i, k] * xc_old[j, k] / wc_approx[i, j]
xc_new[i, k] *= xc_old[i, k]
lc_new[k, k] *= lc_old[k, k]
xc_new = normalize_cols(xc_new)
lc_new /= np.trace(lc_new)
if mvarmax is None:
gamma[it] = alpha * kl_divergence(wc, np.linalg.multi_dot(
[xc_new, lc_new, xc_new.T])) + (1 - alpha) * kl_divergence(yzc, xc_new.dot(lc_new))
else:
temp = np.linalg.multi_dot([xc_new, lc_new, xc_new.T])
cs[it] = kl_divergence(wc, temp)
ct[it] = kl_divergence(yzc, temp)
gamma[it] = alpha * cs[it] + (1 - alpha) * ct[it]
if it == 0:
gamma_min = gamma[it]
else:
if abs((gamma[it] - gamma_min)) / gamma_min < eps:
if is_plot:
plt.plot(gamma[0:it])
plt.ylabel(r"$\gamma$")
plt.xlabel("# iter")
plt.title("# communities: {}".format(m))
- cg = sns.clustermap(wc, figsize=(12, 12), cmap='hot')
- perm = cg.data2d.index.values
- temp = np.linalg.multi_dot([xc_new, lc_new, xc_new.T])
- temp = temp[perm, :]
- temp = temp[:, perm]
- fig, ax = plt.subplots(figsize=(12, 12))
- ax.matshow(temp, cmap='hot')
- ax.set_xticklabels(perm)
- ax.set_yticklabels(perm)
plt.show()
- print("====> no iter: {}".format(it))
+ logging.info("converged after no iter: {}".format(it))
return xc_res, lc_res
if gamma[it] < gamma[it - 1]:
gamma_min = gamma[it]
xc_res = xc_new
lc_res = lc_new
elif it > 2: # cost function starts to diverge after reaching local minimum
if is_plot:
plt.plot(gamma[0:it])
plt.ylabel(r"$\gamma$")
plt.xlabel("# iter")
plt.title("# communities: {}".format(m))
plt.show()
- print("====> no iter: {}".format(it))
+ logging.info("converged after no iter: {}".format(it))
return xc_res, lc_res
xc_old = xc_new
lc_old = lc_new
- raise Exception('Maximum iteration number reached: {}'.format(max_iter))
+ raise Exception('Maximum iteration number({}) exceeded.'.format(max_iter))
def extract_cover(xc, lc, n, xc_prev=None, lc_prev=None):
# Extract the community structure
yc = xc.dot(lc)
dc_inv = dc_inverse(yc, n)
soft_comm = dc_inv.dot(yc)
comm_net = np.linalg.multi_dot([lc, xc.T, soft_comm])
if xc_prev is None:
evol_net = None
else:
evol_net = np.linalg.multi_dot([lc_prev, xc_prev.T, soft_comm])
return soft_comm, dc_inv, comm_net, evol_net
-def read_edge_list(filename, weighted=False, permute=False):
+def read_edge_list(filename, weighted=False, permute=False, mask=[]):
# Input: Entire path to edge-list file
if permute:
- print("Reading and permuting edge file {}".format(filename))
+ logging.info("Reading and permuting edge file {}".format(filename))
else:
- print("Reading edge file {}".format(filename))
+ logging.info("Reading edge file {}".format(filename))
idmap = set()
edge_cache = {}
with open(filename) as f:
for line in f:
if weighted:
u, v, w = [int(x) for x in line.strip().split()]
else:
tmp = [int(x) for x in line.strip().split()]
u, v, w = tmp[0], tmp[1], 1.0
- edge_cache[(u, v)] = w
- idmap.add(u)
- idmap.add(v)
+ if u not in mask and v not in mask:
+ edge_cache[(u, v)] = w
+ idmap.add(u)
+ idmap.add(v)
idmap = list(idmap)
idmap_inv = {nid: i for i, nid in enumerate(idmap)}
n = len(idmap)
adj_mat = np.zeros((n, n))
for (u, v), w in edge_cache.items():
adj_mat[idmap_inv[u], idmap_inv[v]] = w
adj_mat += adj_mat.T
if permute:
tmp_a = adj_mat[np.triu_indices(adj_mat.shape[0], k=1)].flatten()
np.random.shuffle(tmp_a)
tmp_m = np.zeros(adj_mat.shape)
tmp_m[np.triu_indices(tmp_m.shape[0], 1)] = tmp_a
adj_mat = tmp_m + tmp_m.T
wc = adj_mat / adj_mat.sum()
return idmap, idmap_inv, wc
def facetnet_step(edgelist_path, alpha, m, weighted=True, show_plot=False, xc_prev=None, lc_prev=None, idmap0=None, idmap_inv0=None, permutations=0):
"""
Applies one step of the facetNet algorithm
:param edgelist_path: Path to current edgelist
:param alpha: Alpha cost weight
:param weighted: True if W is weighted, False if it is binary
:param m: Number of (a priori) communities, if m = -1, the algorithm detects the best number of communities
:param show_plot: Bool, show convergence plot of gamma
:param xc_prev: X(t-1) matrix
:param lc_prev: LAMBDA(t-1) matrix
:param idmap0: previous id map
:param idmap_inv0: inverse previous idmap
:param permutations: Number of permutations to be applied on the edgelist (for significance investigation of Qs)
:return: idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff
"""
+ mask = [158, 99]
- idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff = step(
- edgelist_path, alpha, m, weighted, show_plot, xc_prev, lc_prev, idmap0, idmap_inv0, permute=False)
+ idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff, wc = step(
+ edgelist_path, alpha, m, weighted, False, xc_prev, lc_prev, idmap0, idmap_inv0, permute=False, ind_mask=mask)
+
+ if show_plot:
+ fig, ax = plt.subplots(figsize=(12, 12))
+ plot_clustermap(ax, wc, xc, lc, idmap)
+ plt.show()
qc_dist = np.empty(permutations)
- for p in range(permutations):
- _, _, _, _, qc_s, _, _, _, _ = step(
- edgelist_path, alpha, m, weighted, show_plot, xc_prev, lc_prev, idmap0, idmap_inv0, permute=True)
- qc_dist[p] = qc_s
if permutations > 0:
- print(qc_dist)
+ for p in tqdm(range(permutations)):
+ _, _, _, _, qc_s, _, _, _, _, _ = step(
+ edgelist_path, alpha, m, weighted, False, xc_prev, lc_prev, idmap0, idmap_inv0, permute=True, ind_mask=mask)
+ qc_dist[p] = qc_s
fig, ax = plt.subplots()
ax.hist(qc_dist, bins=int(np.ceil(np.sqrt(len(qc_dist)))), color='black')
ax.axvline(x=qc_s_res, color='r', linestyle='dashed', linewidth=2)
ax.set_xlabel('$Q_s: \in [0, 1]$')
ax.set_title(
'dashed: $w_c$, black: pemuted $triang(w_c)$, n={}'.format(permutations))
plt.show()
- return idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff
+ return idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff, qc_dist
-def step(edgelist_path, alpha, m, weighted=True, show_plot=False, xc_prev=None, lc_prev=None, idmap0=None, idmap_inv0=None, permute=False):
+def step(edgelist_path, alpha, m, weighted=True, show_plot=False, xc_prev=None, lc_prev=None, idmap0=None, idmap_inv0=None, permute=False, ind_mask=[]):
# Params see
idmap, idmap_inv, wc = read_edge_list(
- edgelist_path, weighted, permute=permute)
+ edgelist_path, weighted, permute=permute, mask=ind_mask)
n = len(idmap)
max_comm = 6
- print("Calculate network representation for {communities} communities".format(
+ logging.info("Calculate network representation for {communities} communities".format(
communities="variable number of" if m < 0 else "{}".format(m)))
if xc_prev is None: # time step 0
if m > 0:
yc = initialize_cover(n, m)
xc, lc = update_cover(yc, wc, 1.0, is_plot=show_plot)
else:
qc_s_res = []
xc_res = []
lc_res = []
zc = initialize_cover(n, n)
for i in range(2, max_comm):
xc_ret, lc_ret = update_cover(
zc, wc, 1.0, mvarmax=i, is_plot=show_plot)
xc_res.append(xc_ret)
lc_res.append(lc_ret)
dc_inv = dc_inverse(xc_ret.dot(lc_ret), n)
qc_s_res.append(soft_modularity(wc, dc_inv, xc_ret, lc_ret))
idx_max = np.argmax(qc_s_res)
xc = xc_res[idx_max]
lc = lc_res[idx_max]
soft_comm, dc_inv, comm_net, evol_net = extract_cover(xc, lc, n)
else:
xc = compensate_node_demography(xc_prev, idmap0, idmap, idmap_inv0)
xc_prev = xc # need demography compensated version of xc_prev to calculate evol_net
if m > 0:
yc = xc.dot(lc_prev)
xc, lc = update_cover(yc, wc, alpha, is_plot=show_plot)
else:
qc_s_res = []
xc_res = []
lc_res = []
zc = np.linalg.multi_dot([xc, lc_prev, xc.T])
for i in range(2, max_comm):
xc_ret, lc_ret = update_cover(
zc, wc, alpha, mvarmax=i, is_plot=show_plot)
xc_res.append(xc_ret)
lc_res.append(lc_ret)
dc_inv = dc_inverse(xc_ret.dot(lc_ret), n)
qc_s_res.append(soft_modularity(wc, dc_inv, xc_ret, lc_ret))
idx_max = np.argmax(qc_s_res)
xc = xc_res[idx_max]
lc = lc_res[idx_max]
- soft_comm, dc_inv, comom_net, evol_net = extract_cover(
+ soft_comm, dc_inv, comm_net, evol_net = extract_cover(
xc, lc, n, xc_prev, lc_prev)
if m > 0:
qc_s_res = soft_modularity(wc, dc_inv, xc, lc)
else:
qc_s = max(qc_s_res)
if show_plot:
no_iter = np.arange(2, max_comm, 1)
plt.plot(no_iter, qc_s_res)
plt.ylabel("Qs")
plt.xlabel("# communities")
plt.title(r"$Q_s,max = $ {} for {} communities".format(
qc_s, idx_max + 2))
plt.show()
# plt.savefig("{}_qc.png".format(edgelist_path))
# qc_s_alt = soft_modularity_alt(soft_comm, wc) # DEBUG
if m < 0:
m_eff = idx_max + 2
else:
m_eff = m
- return idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff
+ return idmap, idmap_inv, xc, lc, qc_s_res, soft_comm, comm_net, evol_net, m_eff, wc
+
+
+def plot_clustermap(ax, wc, xc, lc, idmap):
+ cg = sns.clustermap(wc, figsize=(12, 12), cmap='hot')
+ perm = cg.data2d.index.values
+ l_ticks = [idmap[i] for i in perm]
+ cg.ax_heatmap.set_xticklabels(l_ticks)
+ cg.ax_heatmap.set_yticklabels(l_ticks)
+ temp = np.linalg.multi_dot([xc, lc, xc.T])
+ temp = temp[perm, :]
+ temp = temp[:, perm]
+ ax.matshow(temp, cmap='hot')
+ ax.set_xticks(range(len(l_ticks)))
+ ax.set_xticklabels(l_ticks)
+ ax.set_yticks(range(len(l_ticks)))
+ ax.set_yticklabels(l_ticks)
diff --git a/facetnet_evol.py b/facetnet_evol.py
index e6a7242..39d0160 100644
--- a/facetnet_evol.py
+++ b/facetnet_evol.py
@@ -1,86 +1,88 @@
"""
Modified by Matthias Rüegg (matthias.ruegg@unil.ch) on August 13 2019
Based on a program created by created by github.com/blmoistawinde (https://github.com/blmoistawinde/facetnet-python)
Copyright 2019 __UNIL__. All rights reserved.
This file is part of facetnet-python-unil.
facetnet-python-unil is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
facetnet-python-unil is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with facetnet-python-unil. If not, see .
"""
import argparse
import sys
import facetnet as fn
import numpy as np
import pandas as pd
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("t_steps", help="number of networks", type=int)
- parser.add_argument("alpha", help="alpha cost weight (0.0, 1.0] for gamma = CS*alpha + CT*(1-alpha)", type=float)
+ parser.add_argument(
+ "alpha", help="alpha cost weight (0.0, 1.0] for gamma = CS*alpha + CT*(1-alpha)", type=float)
parser.add_argument("m_cluster", help="number of clusters", type=int)
- parser.add_argument("nw_folder", help="folder holding the network data in edgelist.X form", type=str)
- parser.add_argument("--show", help="show convergence plot of gamma", type=str)
+ parser.add_argument(
+ "nw_folder", help="folder holding the network data in edgelist.X form", type=str)
+ parser.add_argument("res_folder", help="path to results folder, where"
+ "xcap.dat, lcap.dat, idmap.dat, idmap_inv.dat, soft_comm.csv"
+ "will be saved", type=str)
+ parser.add_argument(
+ "--show", help="show convergence plot of gamma", type=bool, default=False)
args = parser.parse_args()
if args.alpha <= 0.0 or args.alpha > 1.0:
print("Alpha value must be in (0.0, 1.0], terminate script")
sys.exit(1)
print("Analyzing networks from: {}".format(args.nw_folder))
- print("# time-steps: {}, alpha: {}, # communities: {}".format(args.t_steps, args.alpha, args.m_cluster))
-
- if args.show is not None:
- is_show = True
- else:
- is_show = False
+ print("# time-steps: {}, alpha: {}, # communities: {}".format(args.t_steps,
+ args.alpha, args.m_cluster))
for t in range(args.t_steps):
print("time-step:", t)
edge_list = "{}/{}.edgelist".format(args.nw_folder, t)
if t == 0:
- idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff = fn.facetnet_step(
- edge_list, args.alpha, args.m_cluster, weighted=True, show_plot=is_show)
+ idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff, _ = fn.facetnet_step(
+ edge_list, args.alpha, args.m_cluster, weighted=True, show_plot=args.show)
else:
- idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff = fn.facetnet_step(
- edge_list, args.alpha, args.m_cluster, weighted=True, show_plot=is_show,
+ idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff, _ = fn.facetnet_step(
+ edge_list, args.alpha, args.m_cluster, weighted=True, show_plot=args.show,
xc_prev=xc, lc_prev=lc, idmap0=idmap, idmap_inv0=idmap_inv)
print("Soft modularity Qc={}".format(qc_s))
# Save results for analysis
clusters = []
for i in range(m_eff):
clusters.append("cluster_{}".format(i))
- if args.xcap is not None and args.lcap is not None and args.idmap is not None and args.idmap_inv is not None:
- evol_net_df = pd.DataFrame(data=evol_net, columns=clusters)
- evol_net_df.to_csv("{}/evol_net_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step),
- index=False)
+ evol_net_df = pd.DataFrame(data=evol_net, columns=clusters)
+ evol_net_df.to_csv("{}/evol_net_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, t),
+ index=False)
soft_comm_df = pd.DataFrame(data=soft_comm, columns=clusters)
soft_comm_df.insert(0, column="id", value=idmap)
- soft_comm_df.to_csv("{}/soft_comm_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step),
+ soft_comm_df.to_csv("{}/soft_comm_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, t),
index=False)
comm_net_df = pd.DataFrame(data=comm_net, columns=clusters)
- comm_net_df.to_csv("{}/comm_net_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step),
+ comm_net_df.to_csv("{}/comm_net_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, t),
index=False)
lc_df = pd.DataFrame(data=lc, columns=clusters)
- lc_df.to_csv("{}/lc_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step), index=False)
+ lc_df.to_csv("{}/lc_alpha{}_nw{}.csv".format(args.res_folder,
+ args.alpha, t), index=False)
print("Results written to: {}".format(args.nw_folder))
diff --git a/facetnet_exp_4_1_1.py b/facetnet_exp_4_1_1.py
index af5ddac..fd75782 100644
--- a/facetnet_exp_4_1_1.py
+++ b/facetnet_exp_4_1_1.py
@@ -1,75 +1,75 @@
"""
Modified by Matthias Rüegg (matthias.ruegg@unil.ch) on August 13 2019
Based on a program created by created by github.com/blmoistawinde (https://github.com/blmoistawinde/facetnet-python)
Copyright 2019 __UNIL__. All rights reserved.
This file is part of facetnet-python-unil.
facetnet-python-unil is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
facetnet-python-unil is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with facetnet-python-unil. If not, see .
"""
import argparse
import numpy as np
import facetnet as fn
import networkx as nx
import community as community_louvain
import matplotlib.pyplot as plt
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--edgelist", help="full path to edge-list file", type=str, default='paper_experiments/synthetic_dataset_1/0.edgelist')
parser.add_argument("--res_folder", help="path to results folder, where"
"xcap.dat, lcap.dat, idmap.dat, idmap_inv.dat, soft_comm.csv"
"will be saved", type=str, default=None)
parser.add_argument(
"--show", help="show convergence plot of gamma", type=str)
args = parser.parse_args()
print("Analyzing network: {}".format(args.edgelist))
if args.show is not None:
is_show = True
else:
is_show = False
max_comm = 7
Q = []
Qs = []
for m in range(2, max_comm):
print("# clusters: {}".format(m))
- idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, _ = fn.facetnet_step(
+ idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, _, _ = fn.facetnet_step(
args.edgelist, 1.0, m, show_plot=is_show, weighted=False)
Qs.append(qc_s)
graph = nx.read_edgelist(args.edgelist, nodetype=int)
if args.res_folder is not None:
np.savetxt("{}/soft_comm_m{}.csv".format(args.res_folder, m),
soft_comm, delimiter=",")
np.savetxt("{}/comm_net_m{}.csv".format(args.res_folder, m),
comm_net, delimiter=",")
hard_comm = np.argmax(soft_comm, axis=1)
cluster_labels = [str(i) for i in range(0, hard_comm.shape[1])]
partition = dict(list(enumerate(hard_comm.flat)))
Q.append(community_louvain.modularity(partition, graph))
plt.plot(np.arange(2, max_comm, 1), Q, 'ro-')
plt.plot(np.arange(2, max_comm, 1), Qs, 'bs-')
plt.legend(["Q", "Qs"])
plt.xticks(np.arange(2, max_comm, 1))
plt.ylim([0.0, 0.6])
plt.xlabel("Community number")
plt.show()
if args.res_folder is not None:
print("Results written to: {}".format(args.res_folder))
diff --git a/facetnet_step.py b/facetnet_step.py
index cd990ed..ac0da0a 100644
--- a/facetnet_step.py
+++ b/facetnet_step.py
@@ -1,124 +1,134 @@
"""
Modified by Matthias Rüegg (matthias.ruegg@unil.ch) on August 13 2019
Based on a program created by created by github.com/blmoistawinde (https://github.com/blmoistawinde/facetnet-python)
Copyright 2019 __UNIL__. All rights reserved.
This file is part of facetnet-python-unil.
facetnet-python-unil is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
facetnet-python-unil is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with facetnet-python-unil. If not, see .
"""
import argparse
import sys
+import logging
import time
import numpy as np
import facetnet as fn
import pandas as pd
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"edgelist", help="full path to edge-list file", type=str)
parser.add_argument(
"alpha", help="alpha cost weight (0.0, 1.0] for gamma = CS*alpha + CT*(1-alpha)", type=float)
parser.add_argument("m_cluster", help="number of clusters", type=int)
parser.add_argument("res_folder", help="path to results folder, where"
"xcap.dat, lcap.dat, idmap.dat, idmap_inv.dat, soft_comm.csv"
"will be saved", type=str)
parser.add_argument(
"t_step", help="network time step, to save result", type=int)
parser.add_argument("--xcap", help="full path to xcap.dat file", type=str)
parser.add_argument("--lcap", help="full path to lcap.dat file", type=str)
parser.add_argument(
"--idmap", help="full path to idmap.dat file", type=str)
parser.add_argument(
"--idmap_inv", help="full path to idmap_inv.dat file", type=str)
parser.add_argument(
"-permutations", help="number of permutations for significance check of Qs", type=int, default=0)
parser.add_argument(
"--show", help="show convergence plot of gamma", type=str)
args = parser.parse_args()
+ logging.basicConfig(filename="{}/alpha{}_nw{}_{}.log".format(args.res_folder, args.alpha, args.t_step,
+ time.strftime("%d-%m-%Y_%H-%M-%S", time.localtime())), level=logging.INFO)
+
if args.alpha <= 0.0 or args.alpha > 1.0:
- print("Alpha value must be in (0.0, 1.0], terminate script")
+ logging.error("Alpha value must be in (0.0, 1.0], terminate script")
sys.exit(1)
- print("Analyzing network: {}".format(args.edgelist))
- print("# time-step: {}, alpha: {}, # communities: {}".format(args.t_step,
- args.alpha, args.m_cluster))
+ logging.info("Analyzing network: {}".format(args.edgelist))
+ logging.info("# time-step: {}, alpha: {}, # communities: {}".format(
+ args.t_step, args.alpha, args.m_cluster))
start = time.time()
if args.xcap is None and args.lcap is None and args.idmap is None and args.idmap_inv is None:
- idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff = fn.facetnet_step(
+ idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff, qc_dist = fn.facetnet_step(
args.edgelist, args.alpha, args.m_cluster, permutations=args.permutations, show_plot=args.show)
elif args.xcap is not None and args.lcap is not None and args.idmap is not None and args.idmap_inv is not None:
xc = np.load(args.xcap, allow_pickle=True)
lc = np.load(args.lcap, allow_pickle=True)
idmap = np.ndarray.tolist(np.load(args.idmap, allow_pickle=True))
idmap_inv = np.ndarray.tolist(
np.load(args.idmap_inv, allow_pickle=True))
- idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff = fn.facetnet_step(
+ idmap, idmap_inv, xc, lc, qc_s, soft_comm, comm_net, evol_net, m_eff, qc_dist = fn.facetnet_step(
args.edgelist, args.alpha, args.m_cluster, permutations=args.permutation,
show_plot=args.show, xc_prev=xc, lc_prev=lc, idmap0=idmap, idmap_inv0=idmap_inv)
else:
- print("Some of (xcap, ycap, idmap, idmap_inv) are given, some not")
+ logging.error(
+ "Some of (xcap, ycap, idmap, idmap_inv) are given, some not")
sys.exit(1)
stop = time.time()
- print("Soft modularity Qc = ({})".format(qc_s))
- print("Elapsed time: {} sec".format(stop - start))
+ logging.info("Soft modularity Qc = ({})".format(qc_s))
+ logging.info("Elapsed time: {} sec".format(stop - start))
# Save end state of current step for input to next step
xc.dump("{}/xcap_alpha{}_nw{}.dat".format(args.res_folder,
args.alpha, args.t_step))
lc.dump("{}/lcap_alpha{}_nw{}.dat".format(args.res_folder,
args.alpha, args.t_step))
np.asarray(idmap).dump(
"{}/idmap_alpha{}_nw{}.dat".format(args.res_folder, args.alpha, args.t_step))
np.asarray(idmap_inv).dump(
"{}/idmap_inv_alpha{}_nw{}.dat".format(args.res_folder, args.alpha, args.t_step))
# Save results for analysis
clusters = []
for i in range(m_eff):
clusters.append("cluster_{}".format(i))
if args.xcap is not None and args.lcap is not None and args.idmap is not None and args.idmap_inv is not None:
evol_net_df = pd.DataFrame(data=evol_net, columns=clusters)
evol_net_df.to_csv("{}/evol_net_step_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step),
index=False)
soft_comm_df = pd.DataFrame(data=soft_comm, columns=clusters)
soft_comm_df.insert(0, column="id", value=idmap)
soft_comm_df.to_csv("{}/soft_comm_step_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step),
index=False)
comm_net_df = pd.DataFrame(data=comm_net, columns=clusters)
comm_net_df.to_csv("{}/comm_net_step_alpha{}_nw{}.csv".format(args.res_folder, args.alpha, args.t_step),
index=False)
lc_df = pd.DataFrame(data=lc, columns=clusters)
lc_df.to_csv("{}/lcap_alpha{}_nw{}.csv".format(args.res_folder,
args.alpha, args.t_step), index=False)
if args.m_cluster < 0:
qc_s_df = pd.DataFrame(data=qc_s, columns=["Q_s"])
qc_s_df.to_csv("{}/q_alpha{}_nw{}.csv".format(args.res_folder,
args.alpha, args.t_step), index=False)
- print("Results and state written to: {}".format(args.res_folder))
+ if args.permutations > 0:
+ qc_dist_df = pd.DataFrame(data=qc_dist, columns=["Q_s"])
+ qc_dist_df.to_csv("{}/q_permute_alpha{}_nw{}.csv".format(
+ args.res_folder, args.alpha, args.t_step), index=False)
+
+ logging.info("Results and state written to: {}".format(args.res_folder))