Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F63039253
facetnet.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Fri, May 17, 08:55
Size
10 KB
Mime Type
text/x-python
Expires
Sun, May 19, 08:55 (2 d)
Engine
blob
Format
Raw Data
Handle
17724182
Attached To
R9074 FacetNet_python_unil
facetnet.py
View Options
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
pandas
as
pd
# from sklearn.metrics import mutual_info_score
def
soft_moduarity
(
wc
,
dc_inv
,
xc
,
lc
):
# Calculate the soft modularity as defined by equation (7)
temp
=
np
.
linalg
.
multi_dot
([
dc_inv
,
xc
,
lc
])
n
=
wc
.
shape
[
0
]
temp1
=
np
.
linalg
.
multi_dot
([
temp
.
T
,
wc
,
temp
])
ones
=
np
.
ones
((
1
,
n
))
onesT
=
ones
.
T
temp2
=
np
.
linalg
.
multi_dot
([
np
.
ones
((
1
,
n
)),
wc
.
T
,
temp
,
temp
.
T
,
wc
,
np
.
ones
((
n
,
1
))])
qc_s
=
np
.
trace
(
np
.
linalg
.
multi_dot
([
temp
.
T
,
wc
,
temp
]))
-
np
.
linalg
.
multi_dot
([
np
.
ones
((
1
,
n
)),
wc
.
T
,
temp
,
temp
.
T
,
wc
,
np
.
ones
((
n
,
1
))])
# Equivalent:
# qc_s = np.trace(temp.T.dot(wc).dot(temp)) - np.ones((1, n)).dot(wc.T).dot(temp).dot(temp.T).dot(wc).dot(np.ones((n, 1)))
return
qc_s
[
0
,
0
]
def
soft_modularity_alt
(
soft_comm
,
W
):
N
=
W
.
shape
[
0
]
ret
=
np
.
trace
(
soft_comm
.
T
*
W
*
soft_comm
)
one
=
np
.
matrix
(
np
.
ones
((
N
,
1
)))
ret
-=
np
.
array
(
one
.
T
*
W
.
T
*
soft_comm
*
soft_comm
.
T
*
W
*
one
)
.
squeeze
()
return
ret
def
similarity
(
adj_mat
,
weighted
,
gamma
=
0.2
):
# calculate a similarity matrix from the adjacency matrix, basically an exponential scaling
n
,
m
=
adj_mat
.
shape
sim
=
np
.
zeros
(
adj_mat
.
shape
)
if
weighted
:
for
i
in
range
(
n
):
for
j
in
range
(
m
):
if
adj_mat
[
i
,
j
]
!=
0
:
sim
[
i
,
j
]
=
np
.
exp
(
-
1.0
/
(
gamma
*
adj_mat
[
i
,
j
]))
else
:
sim
[
i
,
j
]
=
0.0
else
:
sim
=
adj_mat
/
2.0
for
i
in
range
(
n
):
# populate the diagonal
sim
[
i
,
i
]
=
1.0
sim
/=
np
.
sum
(
sim
)
return
sim
def
normalize_rows
(
w
:
np
.
ndarray
):
# normalize matrix w row-wise
row_norm
=
np
.
linalg
.
norm
(
w
,
ord
=
1
,
axis
=
1
)
for
i
in
range
(
w
.
shape
[
0
]):
if
row_norm
[
i
]
!=
0
:
w
[
i
,
:]
/=
row_norm
[
i
]
return
w
def
normalize_cols
(
xc
:
np
.
ndarray
):
# normalize matrix xc column-wise
col_norm
=
np
.
linalg
.
norm
(
xc
,
ord
=
1
,
axis
=
0
)
for
j
in
range
(
xc
.
shape
[
1
]):
if
col_norm
[
j
]
!=
0
:
xc
[:,
j
]
/=
col_norm
[
j
]
return
xc
def
kl_divergence
(
a_mat
,
b_mat
):
# Kullback-Leibler divergence for all non zero elements of a_mat and b_mat
assert
a_mat
.
shape
==
b_mat
.
shape
n
,
m
=
a_mat
.
shape
res
=
0.0
for
i
in
range
(
n
):
for
j
in
range
(
m
):
if
a_mat
[
i
,
j
]
>
1e-20
and
b_mat
[
i
,
j
]
>
1e-20
:
res
+=
a_mat
[
i
,
j
]
*
np
.
log
(
a_mat
[
i
,
j
]
/
b_mat
[
i
,
j
])
-
a_mat
[
i
,
j
]
+
b_mat
[
i
,
j
]
return
res
def
param_update
(
yc
,
wc
,
alpha
,
is_plot
=
False
):
# Update function for xc (capital X and capital L), implementation of equations (4) and (5)
n
,
m
=
yc
.
shape
# xc_old = np.random.rand(xc.shape[0], xc.shape[1])
xc_old
=
np
.
random
.
rand
(
n
,
m
)
xc_old
=
normalize_cols
(
xc_old
)
# lc_old = lc_new
lc_old
=
np
.
diag
(
np
.
random
.
rand
(
m
))
lc_old
/=
np
.
trace
(
lc_old
)
eps
=
1e-5
max_iter
=
int
(
1e3
)
gamma
=
np
.
zeros
(
max_iter
)
for
it
in
range
(
max_iter
):
wc_approx
=
xc_old
.
dot
(
lc_old
.
dot
(
xc_old
.
T
))
xc_new
=
np
.
zeros
(
xc_old
.
shape
)
lc_new
=
np
.
zeros
(
lc_old
.
shape
)
for
k
in
range
(
m
):
for
i
in
range
(
n
):
for
j
in
range
(
n
):
if
wc
[
i
,
j
]
!=
0
:
# avoid divisions by zero in wc_approx
xc_new
[
i
,
k
]
+=
wc
[
i
,
j
]
*
lc_old
[
k
,
k
]
*
xc_old
[
j
,
k
]
/
wc_approx
[
i
,
j
]
lc_new
[
k
,
k
]
+=
wc
[
i
,
j
]
*
xc_old
[
i
,
k
]
*
xc_old
[
j
,
k
]
/
wc_approx
[
i
,
j
]
xc_new
[
i
,
k
]
*=
(
2
*
alpha
*
xc_old
[
i
,
k
])
xc_new
[
i
,
k
]
+=
(
1
-
alpha
)
*
yc
[
i
,
k
]
lc_new
[
k
,
k
]
*=
(
alpha
*
lc_old
[
k
,
k
])
lc_new
[
k
,
k
]
+=
(
1
-
alpha
)
*
sum
(
yc
[:,
k
])
xc_new
=
normalize_cols
(
xc_new
)
lc_new
/=
np
.
trace
(
lc_new
)
gamma
[
it
]
=
alpha
*
kl_divergence
(
wc
,
np
.
linalg
.
multi_dot
([
xc_new
,
lc_new
,
xc_new
.
T
]))
+
(
1
-
alpha
)
*
kl_divergence
(
yc
,
xc_new
.
dot
(
lc_new
))
if
it
==
0
:
gamma_min
=
gamma
[
it
]
else
:
if
abs
((
gamma
[
it
]
-
gamma_min
))
/
gamma_min
<
eps
:
if
is_plot
:
plt
.
plot
(
gamma
[
0
:
it
])
plt
.
ylabel
(
r"$\gamma$"
)
plt
.
xlabel
(
"# iter"
)
plt
.
show
()
print
(
"====> no iter: {}"
.
format
(
it
))
return
xc_res
,
lc_res
if
gamma
[
it
]
<
gamma
[
it
-
1
]:
gamma_min
=
gamma
[
it
]
xc_res
=
xc_new
lc_res
=
lc_new
else
:
if
is_plot
:
plt
.
plot
(
gamma
[
0
:
it
])
plt
.
ylabel
(
r"$\gamma$"
)
plt
.
xlabel
(
"# iter"
)
plt
.
show
()
print
(
"====> no iter: {}"
.
format
(
it
))
return
xc_res
,
lc_res
xc_old
=
xc_new
lc_old
=
lc_new
raise
Exception
(
'Maximum iteration number reached: {}'
.
format
(
max_iter
))
def
read_edge_list
(
filename
,
weighted
=
False
):
print
(
"Reading edge file {}"
.
format
(
filename
))
idmap
=
set
()
edge_cache
=
{}
with
open
(
filename
)
as
f
:
for
line
in
f
:
if
weighted
:
u
,
v
,
w
=
[
int
(
x
)
for
x
in
line
.
strip
()
.
split
()]
else
:
tmp
=
[
int
(
x
)
for
x
in
line
.
strip
()
.
split
()]
u
,
v
,
w
=
tmp
[
0
],
tmp
[
1
],
1.0
edge_cache
[(
u
,
v
)]
=
w
idmap
.
add
(
u
)
idmap
.
add
(
v
)
idmap
=
list
(
idmap
)
idmap_inv
=
{
nid
:
i
for
i
,
nid
in
enumerate
(
idmap
)}
n
=
len
(
idmap
)
adj_mat
=
np
.
zeros
((
n
,
n
))
for
(
u
,
v
),
w
in
edge_cache
.
items
():
adj_mat
[
idmap_inv
[
u
],
idmap_inv
[
v
]]
=
w
adj_mat
+=
adj_mat
.
T
return
idmap
,
idmap_inv
,
adj_mat
def
facetnet_evolution
(
net_path
,
alpha
,
t_steps
,
m
,
show_plot
=
False
):
"""
Applies facetNet algorithm on an entire set of consecutively named edgelist.X files. Can deal with appearing and
disappearing number of nodes. Establishes soft community memberships to an a priori given number of communities (m)..
:param net_path: Path to edge-lists
:param alpha: Alpha cost weight
:param t_steps: Total number of time-steps to be computed (must be at least # of edge-list files)
:param m: Number of (a priori) communities
:param show_plot: Bool, show convergence plot of gamma
:return: No return value, saves soft_comm as soft_comm_alpha0.X_nwX.csv files in net_path folder
"""
clusters
=
[]
for
i
in
range
(
m
):
clusters
.
append
(
"cluster_{}"
.
format
(
i
))
for
t
in
range
(
t_steps
):
print
(
"time-step:"
,
t
)
idmap
,
idmap_inv
,
adj_mat
=
read_edge_list
(
net_path
+
"/
%d
.edgelist"
%
t
,
weighted
=
True
)
n
=
len
(
idmap
)
# wc = normalize_rows(adj_mat)
wc
=
adj_mat
/
adj_mat
.
sum
()
print
(
"Calculate network representation ..."
)
if
t
==
0
:
xc
=
np
.
random
.
rand
(
n
,
m
)
xc
=
normalize_cols
(
xc
)
lc
=
np
.
diag
(
np
.
random
.
rand
(
m
))
lc
/=
np
.
trace
(
lc
)
yc
=
xc
.
dot
(
lc
)
xc
,
lc
,
=
param_update
(
yc
,
wc
,
1.0
,
show_plot
)
else
:
reserved_rows
=
[
idmap_inv0
[
x
]
for
x
in
idmap0
if
x
in
idmap
]
num_new
=
len
(
set
(
idmap
)
-
set
(
idmap0
))
num_old
=
len
(
reserved_rows
)
xc
=
xc
[
reserved_rows
,
:]
xc
=
normalize_cols
(
xc
)
xc
*=
num_old
/
(
num_old
+
num_new
)
if
num_new
!=
0
:
xc
=
np
.
pad
(
xc
,
((
0
,
num_new
),
(
0
,
0
)),
mode
=
'constant'
,
constant_values
=
(
1
/
num_new
,
1
/
num_new
))
yc
=
xc
.
dot
(
lc
)
xc
,
lc
,
=
param_update
(
yc
,
wc
,
alpha
,
show_plot
)
yc
=
xc
.
dot
(
lc
)
dc_inv
=
np
.
zeros
(
n
)
for
i
in
range
(
n
):
dc_inv
[
i
]
=
1
/
np
.
sum
(
yc
[
i
,
:])
dc_inv
=
np
.
matrix
(
np
.
diag
(
dc_inv
))
soft_comm
=
dc_inv
.
dot
(
yc
)
df
=
pd
.
DataFrame
(
data
=
soft_comm
,
columns
=
clusters
)
df
.
insert
(
0
,
column
=
"id"
,
value
=
idmap
)
df
.
to_csv
(
"{}/soft_comm_alpha{}_nw{}.csv"
.
format
(
net_path
,
alpha
,
t
),
index
=
False
)
idmap0
=
idmap
idmap_inv0
=
idmap_inv
def
facetnet_step
(
edgelist_path
,
alpha
,
m
,
weighted
=
True
,
show_plot
=
False
,
xc_prev
=
None
,
lc_prev
=
None
,
idmap0
=
None
,
idmap_inv0
=
None
):
"""
Applies one step of the facetNet algorithm, equivalent to a single step of facetnet_evolution
:param xc_prev: X(t-1) matrix
:param lc_prev: LAMBDA(t-1) matrix
:param idmap0: previous id map
:param idmap_inv0: inverse previous idmap
:param edgelist_path: Path to current edgelist
:param alpha: Alpha cost weight
:param weighted: True if W is weighted, False if it is binary
:param m: Number of (a priori) communities
:param show_plot: Bool, show convergence plot of gamma
:return: idmap, idmap_inv, xc, lc, soft_comm_df
"""
clusters
=
[]
for
i
in
range
(
m
):
clusters
.
append
(
"cluster_{}"
.
format
(
i
))
idmap
,
idmap_inv
,
adj_mat
=
read_edge_list
(
edgelist_path
,
weighted
)
n
=
len
(
idmap
)
# wc = normalize_rows(adj_mat)
wc
=
adj_mat
/
adj_mat
.
sum
()
print
(
"Calculate network representation ..."
)
if
xc_prev
is
None
:
print
(
"This is an initilaizing step (t = 0)"
)
xc
=
np
.
random
.
rand
(
n
,
m
)
xc
=
normalize_cols
(
xc
)
lc
=
np
.
diag
(
np
.
random
.
rand
(
m
))
lc
/=
np
.
trace
(
lc
)
yc
=
xc
.
dot
(
lc
)
xc
,
lc
,
=
param_update
(
yc
,
wc
,
1.0
,
show_plot
)
else
:
reserved_rows
=
[
idmap_inv0
[
x
]
for
x
in
idmap0
if
x
in
idmap
]
num_new
=
len
(
set
(
idmap
)
-
set
(
idmap0
))
num_old
=
len
(
reserved_rows
)
xc
=
xc_prev
[
reserved_rows
,
:]
xc
=
normalize_cols
(
xc
)
xc
*=
num_old
/
(
num_old
+
num_new
)
if
num_new
!=
0
:
xc
=
np
.
pad
(
xc
,
((
0
,
num_new
),
(
0
,
0
)),
mode
=
'constant'
,
constant_values
=
(
1
/
num_new
,
1
/
num_new
))
yc
=
xc
.
dot
(
lc_prev
)
xc
,
lc
,
=
param_update
(
yc
,
wc
,
alpha
,
show_plot
)
yc
=
xc
.
dot
(
lc
)
dc_inv
=
np
.
zeros
(
n
)
for
i
in
range
(
n
):
dc_inv
[
i
]
=
1
/
np
.
sum
(
yc
[
i
,
:])
dc_inv
=
np
.
matrix
(
np
.
diag
(
dc_inv
))
soft_comm
=
dc_inv
.
dot
(
yc
)
comm_net
=
np
.
linalg
.
multi_dot
([
lc
,
xc
.
T
,
soft_comm
])
if
xc_prev
is
None
:
evol_net
=
None
else
:
evol_net
=
np
.
linalg
.
multi_dot
([
lc_prev
,
xc_prev
.
T
,
soft_comm
])
qc_s
=
soft_moduarity
(
wc
,
dc_inv
,
xc
,
lc
)
qc_s_alt
=
soft_modularity_alt
(
soft_comm
,
wc
)
print
(
"Soft comm alt = {}"
.
format
(
qc_s_alt
))
soft_comm_df
=
pd
.
DataFrame
(
data
=
soft_comm
,
columns
=
clusters
)
soft_comm_df
.
insert
(
0
,
column
=
"id"
,
value
=
idmap
)
comm_net_df
=
pd
.
DataFrame
(
data
=
comm_net
,
columns
=
clusters
)
evol_net_df
=
pd
.
DataFrame
(
data
=
evol_net
,
columns
=
clusters
)
return
idmap
,
idmap_inv
,
xc
,
lc
,
qc_s
,
soft_comm_df
,
comm_net_df
,
evol_net_df
Event Timeline
Log In to Comment