diff --git a/configuration.py b/configuration.py
index eae1871..5749a77 100644
--- a/configuration.py
+++ b/configuration.py
@@ -1,31 +1,31 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 """
     Configuration file.
 """
 import torch
 
 # settings for the implementation
 SEED = 7
 
 # used device (CPU vs GPU)
 DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 # settings for the dataset
 DATASET = 'mnist_012'
 TRAIN_SIZE = 500
 VALID_SIZE = 100
 TEST_SIZE = 100
 
 # properties of the model
 BATCH_SIZE = 100
 LEARNING_RATE = 1e-4
 USE_PRETRAINED_MODEL = False
 
 # settings of the run
 TRAIN = True
-PATIENCE = 100
+PATIENCE = 20
 
 # debugging
 GENERATE_SAVE = False
\ No newline at end of file
diff --git a/datasets.py b/datasets.py
index f1ac475..f09da5d 100644
--- a/datasets.py
+++ b/datasets.py
@@ -1,126 +1,111 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 """
     Datasets module.
 """
 
 import numpy as np
 import logging
 
 from torch.utils.data import DataLoader
 from torchvision import datasets, transforms
 
 from loader import MNIST_bis
 from custom_transform import RandomTranslation, Substract
 from utils import train_valid_split, train_valid_test_split
 from configuration import *
+from utils import get_dim, count_class_freq
 
 logger = logging.getLogger(__name__)
 
 torch.manual_seed(SEED)
 
 # datasets mean and standard deviation used for normalization
 # L = R * 299/1000 + G * 587/1000 + B * 114/1000
 MNIST_MEAN = [0.458]
 MNIST_STD = [0.225]
 ETH80_MEAN = [0.426]
 ETH80_STD = [0.166]
 
-def count_class_freq(loader, num_classes):
-    """Return the frequency for each class from the loader."""
-        
-    t = np.zeros(num_classes)
-    for _, target in loader:
-        for c in target:
-            t[c] +=1
-    return t
-
-def get_dim(loader):
-    """Get the dimension of the input image."""
-
-    dim = iter(loader).next()[0].size()[2]
-
-    return dim
-
 def load_dataset(dataset, train_size, valid_size, test_size):
     """Load the dataset passed in argument with the corresponding sizes for the training, validation and testing set."""
 
     if dataset == 'mnist_012':
         root = './data/mnist'
         num_classes = 3
 
         trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)])
         train_valid_set = datasets.MNIST(root=root, train=True, transform=trans)
         test_set = datasets.MNIST(root=root, train=False, transform=trans)
 
         train_valid_set = MNIST_bis(dataset=train_valid_set, size=train_size+valid_size, digits_to_keep=[0,1,2])
         test_set = MNIST_bis(dataset=test_set, size=test_size, digits_to_keep=[0,1,2])
 
         train_sampler, valid_sampler = train_valid_split(dataset=train_valid_set, train_size=train_size)
 
         train_loader = DataLoader(dataset=train_valid_set, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True)
         valid_loader = DataLoader(dataset=train_valid_set, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True)
         test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True, drop_last=True)
 
     elif dataset == 'mnist_rot':
         root = './data/mnist'
         num_classes = 9
 
         train_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)])
         test_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), transforms.RandomRotation((0,360)), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)])
         train_valid_set = datasets.MNIST(root=root, train=True, transform=train_trans)
         test_set = datasets.MNIST(root=root, train=False, transform=test_trans)
 
         train_valid_set_bis = MNIST_bis(dataset=train_valid_set, size=train_size+valid_size, digits_to_keep=[0,1,2,3,4,5,6,7,8])
         test_set = MNIST_bis(dataset=test_set, size=test_size, digits_to_keep=[0,1,2,3,4,5,6,7,8])
 
         train_sampler, valid_sampler = train_valid_split(dataset=train_valid_set_bis, train_size=train_size)
 
         train_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True)
         valid_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True)
         test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True, drop_last=True)
 
     elif dataset == 'mnist_trans':
         root = './data/mnist'
         num_classes = 9
 
         train_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)])
         test_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), RandomTranslation(horizontal=6, vertical=6), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)])
         train_valid_set = datasets.MNIST(root=root, train=True, transform=train_trans)
         test_set = datasets.MNIST(root=root, train=False, transform=test_trans)
         
         train_valid_set_bis = MNIST_bis(dataset=train_valid_set, size=train_size+valid_size, digits_to_keep=[0,1,2,3,4,5,6,7,8])
         test_set = MNIST_bis(dataset=test_set, size=test_size, digits_to_keep=[0,1,2,3,4,5,6,7,8])
 
         train_sampler, valid_sampler = train_valid_split(dataset=train_valid_set_bis, train_size=train_size)
 
         train_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True)
         valid_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True)
         test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True, drop_last=True)
 
     elif dataset == 'eth80':
         root = './data/eth80'
         num_classes = 8
 
         trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((50,50)), transforms.ToTensor(), transforms.Normalize(mean=ETH80_MEAN, std=ETH80_STD)])
         complete_set = datasets.ImageFolder(root=root, transform=trans)
         class_names = complete_set.classes
 
         train_sampler, valid_sampler, test_sampler = train_valid_test_split(dataset=complete_set, train_size=train_size, valid_size=valid_size)
         
         train_loader = DataLoader(dataset=complete_set, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True) 
         valid_loader = DataLoader(dataset=complete_set, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True) 
         test_loader = DataLoader(dataset=complete_set, batch_size=BATCH_SIZE, sampler=test_sampler, num_workers=4, pin_memory=True, drop_last=True)
 
     else:
         raise ValueError('Specified dataset does not exist.')
 
     logger.debug('Class frequency train loader: {} validation loader: {} test loader: {}'.format(
         count_class_freq(train_loader, num_classes),count_class_freq(valid_loader, num_classes), count_class_freq(test_loader, num_classes))
         )
     logging.info('Loaded {} dataset with the split {}-{}-{} for the [train]-[valid]-[test] setup.'.format(dataset, len(train_loader)*BATCH_SIZE, len(valid_loader)*BATCH_SIZE, len(test_loader)*BATCH_SIZE))
 
 
     return train_loader, valid_loader, test_loader, get_dim(train_loader)
 
diff --git a/evaluate.py b/evaluate.py
index 93d6159..b659579 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -1,93 +1,93 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 """
     Test module.
 """
 
 import numpy as np
 import logging
 from tqdm import tqdm
 import sys
 
 from saved_datasets import load_saved_dataset
 from graph import compute_laplacians 
 from utils import load_pretrained_model, snapshot
 from paths import SAVED_MODELS_DIR
 from configuration import *
 from models import TIGraNet_mnist_012, TIGraNet_mnist_rot, TIGraNet_mnist_trans, TIGraNet_eth80
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
 def load_model(dataset_name, dim, laplacian_matrix, shifted_laplacian_matrix):
     """Load the model associated with the dataset."""
 
     if dataset_name == 'mnist_012':
         model = TIGraNet_mnist_012(
             dim=dim,
             laplacian_matrix=laplacian_matrix,
             shifted_laplacian_matrix=shifted_laplacian_matrix,
             batch_size=BATCH_SIZE,
             learning_rate=LEARNING_RATE,
             load_pretrained_weights=True
             )
     elif dataset_name == 'mnist_rot':
         model = TIGraNet_mnist_rot(
             dim=dim,
             laplacian_matrix=laplacian_matrix,
             shifted_laplacian_matrix=shifted_laplacian_matrix,
             batch_size=BATCH_SIZE,
             learning_rate=LEARNING_RATE,
             load_pretrained_weights=True
             )
     elif dataset_name == 'mnist_trans':
         model = TIGraNet_mnist_trans(
             dim=dim,
             laplacian_matrix=laplacian_matrix,
             shifted_laplacian_matrix=shifted_laplacian_matrix,
             batch_size=BATCH_SIZE,
             learning_rate=LEARNING_RATE,
             load_pretrained_weights=True
             )
     elif dataset_name == 'eth80':
         model = TIGraNet_eth80(
             dim=dim,
             laplacian_matrix=laplacian_matrix,
             shifted_laplacian_matrix=shifted_laplacian_matrix,
             batch_size=BATCH_SIZE,
             learning_rate=LEARNING_RATE,
             load_pretrained_weights=True
             )
     
     return model
 
 # get arguments from command line
 if len(sys.argv) != 2:
     print('Usage: python3 evaluate.py [DATASET]')
     sys.exit(1)
 else:
     dataset_name = sys.argv[-1]
     if dataset_name not in ['mnist_012', 'mnist_rot', 'mnist_trans', 'eth80']:
         print('DATASET available: mnist_012, mnist_rot, mnist_trans or eth80')
         sys.exit(1)
 
 # prepare data and model
 _, _, test_loader, dim, laplacian_matrix, shifted_laplacian_matrix = load_saved_dataset(name=dataset_name)
 model = load_model(dataset_name=dataset_name, dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix)
 
 # pass it to GPU if available
 model.to(DEVICE)
 
 # evaluate on testing set
 logging.info('Testing...')
 acc_valid = 0
 test_samples_size = len(test_loader) * BATCH_SIZE
 for data, target in tqdm(test_loader):
     
     data = data.to(DEVICE)
     y_pred = model.predict(data)
-    acc_valid += torch.eq(y_pred.data.cpu(),target).sum().item()
+    acc_valid += torch.eq(y_pred.cpu(),target).sum().item()
 
 error_test = 100 - 100 * acc_valid / test_samples_size
 print('test error: {:.2f} %'.format(error_test))
\ No newline at end of file
diff --git a/models.py b/models.py
index debfa73..7579f8c 100644
--- a/models.py
+++ b/models.py
@@ -1,492 +1,492 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 """
     Neural Networks models module.
 """
 
 import numpy as np
 import logging
 import os
 
 import torch
 import torch.nn as nn
 import torch.optim as optim
 from torch.autograd import Variable
 
 from layers import SpectralConv, DynamicPool, Statistic
 from utils import init_mask
 from configuration import *
 from paths import SAVED_DATA, DEBUG_DIR_MNIST_012, DEBUG_DIR_MNIST_rot, DEBUG_DIR_ETH80
 
 logger = logging.getLogger(__name__)
 
 class TIGraNet(nn.Module):
-    def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=True):
+    def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=False):
         super(TIGraNet, self).__init__()
         self.num_nodes = dim**2
         self.laplacian_matrix = laplacian_matrix
         self.shifted_laplacian_matrix = shifted_laplacian_matrix
         self.batch_size = batch_size
         self.learning_rate = learning_rate
         self.mask = init_mask(num_nodes=self.num_nodes, batch_size=self.batch_size)
         self.load_pretrained_weights = load_pretrained_weights
         self.freeze_sc_weights = freeze_sc_weights
 
         self.loss_function = torch.nn.CrossEntropyLoss()
 
     def init_pretrained_weights(self, name):
         """Initialize the weights of the model with pretrained weights."""
 
         self.spectral_conv1.alpha.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'alpha_0.npy'))))
         self.spectral_conv1.beta.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'beta_0.npy'))).unsqueeze(0))
         self.spectral_conv2.alpha.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'alpha_1.npy'))))
         self.spectral_conv2.beta.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'beta_1.npy'))).unsqueeze(0))
         self.fully_connected[0].weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'W_1.npy'))).t())
         self.fully_connected[0].bias = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'b_1.npy'))))
         self.fully_connected[2].weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'W_2.npy'))).t())
         self.fully_connected[2].bias = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'b_2.npy'))))
         self.fully_connected[4].weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'W_3.npy'))).t())
         self.fully_connected[4].bias = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'b_3.npy'))))
         self.fully_connected[6].weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'W_last.npy'))).t())
         self.fully_connected[6].bias = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'b_last.npy'))))
         
         if name=='mnist_012':
             self.spectral_conv3.alpha.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'alpha_2.npy'))))
             self.spectral_conv3.beta.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'beta_2.npy'))).unsqueeze(0))
 
     def prepare_input(self, input):
         input = input.view(self.batch_size, 1, self.num_nodes)
         input = input - torch.mean(input, 2, True)
         input = input.transpose(1,2)
         return input
     
     def step(self, input, target, train):
         if train:
             self.train()
         else:
             self.eval()
         self.optimizer.zero_grad()
         out = self.forward(input)
         loss = self.loss_function(out, target)
 
         if train:
             loss.backward()
 
         self.optimizer.step()
 
         return loss.item()
 
     def predict(self, input):
         self.eval()
         output = self.forward(input)
         _, output = torch.max(output, 1)
 
         return output
 
 class TIGraNet_mnist_012(TIGraNet):
-    def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=True):
+    def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=False):
         TIGraNet.__init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights, freeze_sc_weights)
 
         # Main layers
         self.spectral_conv1 = SpectralConv(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             filter_size_in=1,
             filter_size_out=3,
             degree_of_polynomial=4,
             laplacian_matrix=self.laplacian_matrix,
             mask=self.mask
             )
         self.dynamic_pool1 = DynamicPool(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=3,
             num_active_nodes=200,
             mask=self.mask
             )
         self.spectral_conv2 = SpectralConv(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             filter_size_in=3,
             filter_size_out=6,
             degree_of_polynomial=4,
             laplacian_matrix=self.laplacian_matrix,
             mask=self.mask
             )
         self.dynamic_pool2 = DynamicPool(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=6,
             num_active_nodes=100,
             mask=self.mask
             )
         self.spectral_conv3 = SpectralConv(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             filter_size_in=6,
             filter_size_out=9,
             degree_of_polynomial=4,
             laplacian_matrix=self.laplacian_matrix,
             mask=self.mask
             )
         self.dynamic_pool3 = DynamicPool(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=9,
             num_active_nodes=50,
             mask=self.mask
             )
         self.statistic = Statistic(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=9,
             degree_of_polynomial=9,
             shifted_laplacian_matrix=self.shifted_laplacian_matrix
             )
         self.fully_connected = nn.Sequential(
             nn.Linear(in_features=180, out_features=100),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=100, out_features=80),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=80, out_features= 60),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=60, out_features=3)
         )
         
         if load_pretrained_weights:
             self.init_pretrained_weights(name='mnist_012')
 
             # random checks
             assert (self.spectral_conv2.alpha.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_012', 'parameters', 'alpha_1.npy'))))).all()
             assert (self.spectral_conv2.beta.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_012', 'parameters', 'beta_1.npy'))).unsqueeze(0))).all()
             assert (self.fully_connected[2].weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_012', 'parameters', 'W_2.npy'))).t())).all()
 
             logger.info('Loaded pretrained weights.')
         else:
             self.init_weights_default()
             logger.info('Loaded weights using uniform distribution in [0,1].')
 
         if freeze_sc_weights:
             # freeze the parameters of the spectral conv layer
             for m in self.modules():
                 if isinstance(m, SpectralConv):
                     m.alpha.weight.requires_grad = False
                     m.beta.weight.requires_grad = False
 
             logger.info('Freezed spectral conv weights.')
 
         self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=learning_rate)
 
         logger.info('Loaded {} optimizer.'.format(type(self.optimizer).__name__))
 
 
     def init_weights_default(self):
         """Initialize the weights of the model with uniform distribution in [0,1]."""
 
         for m in self.modules():
             if isinstance(m, SpectralConv):
                 nn.init.uniform_(m.alpha.weight)
                 nn.init.uniform_(m.beta.weight)
 
     def forward(self, input):
         prepared_input = self.prepare_input(input)
         filter_operator1, y1, spectral_conv1 = self.spectral_conv1(prepared_input, self.mask)
         mask1, dynamic_pool1 = self.dynamic_pool1(spectral_conv1, self.mask)
         filter_operator2, y2, spectral_conv2 = self.spectral_conv2(spectral_conv1, mask1)
         mask2, dynamic_pool2 = self.dynamic_pool2(spectral_conv2, mask1)
         filter_operator3, y3, spectral_conv3 = self.spectral_conv3(spectral_conv2, mask2)
         mask3, dynamic_pool3 = self.dynamic_pool3(spectral_conv3, mask2)
         statistic = self.statistic(spectral_conv3, mask3)
         output = self.fully_connected(statistic)
 
         if GENERATE_SAVE:
             # save all intermediary steps for debugging
             variables = [prepared_input, filter_operator1, y1, spectral_conv1, filter_operator2, y2, spectral_conv2, filter_operator3, y3, spectral_conv3, mask1, mask2, mask3, statistic, output]
             variables_names = ['prepared_input', 'filter_operator1', 'y1', 'spectral_conv1', 'filter_operator2', 'y2', 'spectral_conv2', 'filter_operator3', 'y3', 'spectral_conv3', 'mask1', 'mask2', 'mask3', 'statistic', 'output']
             tuples = zip(variables, variables_names)
 
             for v, n in tuples:
                 # np.save(DEBUG_DIR_MNIST_012 + 'constant_weights/' + n + '_p', v.detach().numpy())
                 np.save(DEBUG_DIR_MNIST_012 + 'pretrained_weights/' + n + '_p_pw', v.detach().numpy())
 
         return output
 
 class TIGraNet_mnist_rot(TIGraNet):
-    def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=True):
+    def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=False):
         TIGraNet.__init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights, freeze_sc_weights)
 
         # Main layers
         self.spectral_conv1 = SpectralConv(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             filter_size_in=1,
             filter_size_out=10,
             degree_of_polynomial=4,
             laplacian_matrix=self.laplacian_matrix,
             mask=self.mask
             )
         self.dynamic_pool1 = DynamicPool(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=10,
             num_active_nodes=600,
             mask=self.mask
             )
         self.spectral_conv2 = SpectralConv(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             filter_size_in=10,
             filter_size_out=20,
             degree_of_polynomial=4,
             laplacian_matrix=self.laplacian_matrix,
             mask=self.mask
             )
         self.dynamic_pool2 = DynamicPool(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=20,
             num_active_nodes=300,
             mask=self.mask
             )
         self.statistic = Statistic(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=20,
             degree_of_polynomial=13,
             shifted_laplacian_matrix=self.shifted_laplacian_matrix
             )
         self.fully_connected = nn.Sequential(
             nn.Linear(in_features=560, out_features=500),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=500, out_features=300),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=300, out_features= 100),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=100, out_features=9)
         )
         
         if load_pretrained_weights:
             self.init_pretrained_weights(name='mnist_rot')
 
             # random checks
             assert (self.spectral_conv2.alpha.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_rot', 'parameters', 'alpha_1.npy'))))).all()
             assert (self.spectral_conv2.beta.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_rot', 'parameters', 'beta_1.npy'))).unsqueeze(0))).all()
             assert (self.fully_connected[2].weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_rot', 'parameters', 'W_2.npy'))).t())).all()
 
             logger.info('Loaded pretrained weights.')
 
         if freeze_sc_weights:
             # freeze the parameters of the spectral conv layer
             for m in self.modules():
                 if isinstance(m, SpectralConv):
                     m.alpha.weight.requires_grad = False
                     m.beta.weight.requires_grad = False
 
             logger.info('Freezed spectral conv weights.')
 
         self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=learning_rate)
         logger.info('Loaded {} optimizer.'.format(type(self.optimizer).__name__))
 
     def forward(self, input):
         prepared_input = self.prepare_input(input)
         filter_operator1, y1, spectral_conv1 = self.spectral_conv1(prepared_input, self.mask)
         mask1, dynamic_pool1 = self.dynamic_pool1(spectral_conv1, self.mask)
         filter_operator2, y2, spectral_conv2 = self.spectral_conv2(spectral_conv1, mask1)
         mask2, dynamic_pool2 = self.dynamic_pool2(spectral_conv2, mask1)
         statistic = self.statistic(spectral_conv2, mask2)
         output = self.fully_connected(statistic)
 
         if GENERATE_SAVE:
             # save all intermediary steps for debugging
             variables = [prepared_input, filter_operator1, y1, spectral_conv1, filter_operator2, y2, spectral_conv2, mask1, mask2, statistic, output]
             variables_names = ['prepared_input', 'filter_operator1', 'y1', 'spectral_conv1', 'filter_operator2', 'y2', 'spectral_conv2', 'mask1', 'mask2', 'statistic', 'output']
             tuples = zip(variables, variables_names)
 
             for v, n in tuples:
                 np.save(DEBUG_DIR_MNIST_rot + n + '_p_pw', v.detach().numpy())
 
         return output
 
 class TIGraNet_mnist_trans(TIGraNet):
-    def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=True):
+    def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=False):
         TIGraNet.__init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights, freeze_sc_weights)
 
         # Main layers
         self.spectral_conv1 = SpectralConv(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             filter_size_in=1,
             filter_size_out=10,
             degree_of_polynomial=7,
             laplacian_matrix=self.laplacian_matrix,
             mask=self.mask
             )
         self.dynamic_pool1 = DynamicPool(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=10,
             num_active_nodes=600,
             mask=self.mask
             )
         self.spectral_conv2 = SpectralConv(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             filter_size_in=10,
             filter_size_out=20,
             degree_of_polynomial=7,
             laplacian_matrix=self.laplacian_matrix,
             mask=self.mask
             )
         self.dynamic_pool2 = DynamicPool(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=20,
             num_active_nodes=300,
             mask=self.mask
             )
         self.statistic = Statistic(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=20,
             degree_of_polynomial=11,
             shifted_laplacian_matrix=self.shifted_laplacian_matrix
             )
         self.fully_connected = nn.Sequential(
             nn.Linear(in_features=480, out_features=500),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=500, out_features=300),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=300, out_features= 100),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=100, out_features=9)
         )
         
         if load_pretrained_weights:
             self.init_pretrained_weights(name='mnist_trans')
 
             # random checks
             assert (self.spectral_conv2.alpha.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_trans', 'parameters', 'alpha_1.npy'))))).all()
             assert (self.spectral_conv2.beta.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_trans', 'parameters', 'beta_1.npy'))).unsqueeze(0))).all()
             assert (self.fully_connected[2].weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_trans', 'parameters', 'W_2.npy'))).t())).all()
 
             logger.info('Loaded pretrained weights.')
 
         if freeze_sc_weights:
             # freeze the parameters of the spectral conv layer
             for m in self.modules():
                 if isinstance(m, SpectralConv):
                     m.alpha.weight.requires_grad = False
                     m.beta.weight.requires_grad = False
 
             logger.info('Freezed spectral conv weights.')
 
         self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=learning_rate)
         logger.info('Loaded {} optimizer.'.format(type(self.optimizer).__name__))
 
     def forward(self, input):
         prepared_input = self.prepare_input(input)
         filter_operator1, y1, spectral_conv1 = self.spectral_conv1(prepared_input, self.mask)
         mask1, dynamic_pool1 = self.dynamic_pool1(spectral_conv1, self.mask)
         filter_operator2, y2, spectral_conv2 = self.spectral_conv2(spectral_conv1, mask1)
         mask2, dynamic_pool2 = self.dynamic_pool2(spectral_conv2, mask1)
         statistic = self.statistic(spectral_conv2, mask2)
         output = self.fully_connected(statistic)
 
         return output
 
 class TIGraNet_eth80(TIGraNet):
-    def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=True):
+    def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=False):
         TIGraNet.__init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights, freeze_sc_weights)
 
         # Main layers
         self.spectral_conv1 = SpectralConv(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             filter_size_in=1,
             filter_size_out=10,
             degree_of_polynomial=5,
             laplacian_matrix=self.laplacian_matrix,
             mask=self.mask
             )
         self.dynamic_pool1 = DynamicPool(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=10,
             num_active_nodes=600,
             mask=self.mask
             )
         self.spectral_conv2 = SpectralConv(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             filter_size_in=10,
             filter_size_out=20,
             degree_of_polynomial=5,
             laplacian_matrix=self.laplacian_matrix,
             mask=self.mask
             )
         self.dynamic_pool2 = DynamicPool(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=20,
             num_active_nodes=300,
             mask=self.mask
             )
         self.statistic = Statistic(
             batch_size=self.batch_size,
             num_nodes=self.num_nodes,
             num_filters=20,
             degree_of_polynomial=11,
             shifted_laplacian_matrix=self.shifted_laplacian_matrix
             )
         self.fully_connected = nn.Sequential(
             nn.Linear(in_features=480, out_features=500),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=500, out_features=300),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=300, out_features= 100),
             nn.ReLU(inplace=True),
             nn.Linear(in_features=100, out_features=8)
         )
         
         if load_pretrained_weights:
             self.init_pretrained_weights(name='eth80')
 
             # random checks
             assert (self.spectral_conv2.alpha.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'eth80', 'parameters', 'alpha_1.npy'))))).all()
             assert (self.spectral_conv2.beta.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'eth80', 'parameters', 'beta_1.npy'))).unsqueeze(0))).all()
             assert (self.fully_connected[2].weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'eth80', 'parameters', 'W_2.npy'))).t())).all()
 
             logger.info('Loaded pretrained weights.')
 
         if freeze_sc_weights:
             # freeze the parameters of the spectral conv layer
             for m in self.modules():
                 if isinstance(m, SpectralConv):
                     m.alpha.weight.requires_grad = False
                     m.beta.weight.requires_grad = False
 
             logger.info('Freezed spectral conv weights.')
 
         self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=learning_rate)
         logger.info('Loaded {} optimizer.'.format(type(self.optimizer).__name__))
 
     def forward(self, input):
         prepared_input = self.prepare_input(input)
         filter_operator1, y1, spectral_conv1 = self.spectral_conv1(prepared_input, self.mask)
         mask1, dynamic_pool1 = self.dynamic_pool1(spectral_conv1, self.mask)
         filter_operator2, y2, spectral_conv2 = self.spectral_conv2(spectral_conv1, mask1)
         mask2, dynamic_pool2 = self.dynamic_pool2(spectral_conv2, mask1)
         statistic = self.statistic(spectral_conv2, mask2)
         output = self.fully_connected(statistic)
 
         if GENERATE_SAVE:
             # save all intermediary steps for debugging
             variables = [prepared_input, filter_operator1, y1, spectral_conv1, filter_operator2, y2, spectral_conv2, mask1, mask2, statistic, output]
             variables_names = ['prepared_input', 'filter_operator1', 'y1', 'spectral_conv1', 'filter_operator2', 'y2', 'spectral_conv2', 'mask1', 'mask2', 'statistic', 'output']
             tuples = zip(variables, variables_names)
 
             for v, n in tuples:
                 np.save(DEBUG_DIR_ETH80 + n + '_p_pw', v.detach().numpy())
 
         return output
\ No newline at end of file
diff --git a/numerical_instability_example.py b/numerical_instability_example.py
index 3f88a85..9a5ed24 100644
--- a/numerical_instability_example.py
+++ b/numerical_instability_example.py
@@ -1,37 +1,61 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 """
     Illustrates numerical instability for similar function in Theano and PyTorch.
 """
 
 import torch
 import numpy as np
 import theano
 import theano.tensor as T
 import os
 
 from paths import *
 
 torch.manual_seed(7)
 
 def func_pytorch(a, b):
 
     a = a.permute(0,2,1).contiguous().view(-1,400)
     b = b.view(400,-1)
 
     return torch.matmul(a, b).view(100,5,400,10).permute(0,2,3,1)
 
 def func_theano(a, b):
     return T.tensordot(a, b, [[2], [2]])
 
+def func_numpy(a,b):
+    return np.tensordot(a,b, [[2],[2]])
+
 a = torch.randn(100,400,5)
 b = torch.randn(400,400,10)
 
 a_p = a.permute(0,2,1)
 b_p = b.permute(2,1,0)
 
-out = func_pytorch(a, b)
+out_pytorch = func_pytorch(a, b)
+out_numpy = np.transpose(func_numpy(a_p,b_p), (0,3,2,1))
 
 out_true = np.transpose(func_theano(a_p, b_p).eval(), (0,3,2,1))
-np.testing.assert_allclose(actual=out, desired=out_true, rtol=1e-6)
+
+
+# from debug import plot_pytorch_theano_image, plot_pytorch_theano_image_diff
+# from path import *
+
+# print(out.shape, out_true[0,:,0,0].shape)
+
+# plot_pytorch_theano_image(
+#     images=[out[8,:,2,3].numpy(), out_true[8,:,2,3]],
+#     dir=DEBUG_DIR_MNIST_012 + 'constant_weights/',
+#     name='temp'
+# )
+
+# plot_pytorch_theano_image_diff(
+#     images=[out[8,:,2,3].numpy(), out_true[8,:,2,3]],
+#     dir=DEBUG_DIR_MNIST_012 + 'constant_weights/',
+#     name='temp_diff'
+# )
+
+np.testing.assert_allclose(actual=out_numpy, desired=out_true, rtol=1e-7) # OK
+np.testing.assert_allclose(actual=out_pytorch, desired=out_true, rtol=1e-7) # 76% mismatch
diff --git a/plot.py b/plot.py
index f139ac5..f1a6638 100644
--- a/plot.py
+++ b/plot.py
@@ -1,171 +1,113 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
  
 """
     Plot functions to create figures.
 """
 
 import matplotlib.pyplot as plt
 from mpl_toolkits.axes_grid1 import make_axes_locatable
 import numpy as np
 
 import torch.nn as nn
 
 from paths import FIGURES_DIR, DEBUG_DIR_MNIST_012
 
 def plot_loss_and_acc(run_time, run_name, history):
     """Generate a plot with training loss and validation accuracy for a specific model."""
 
     num_epochs = list(range(len(history)))
     loss = [t[0] for t in history]
     acc = [t[1] for t in history]
 
     fig, ax1 = plt.subplots()
 
     ax2 = ax1.twinx()
     r1 = ax1.plot(num_epochs, loss, color='red', label='training loss')
     ax1.set_xlabel('epoch')
     ax1.set_ylabel('loss')
 
     r2 = ax2.plot(num_epochs, acc, color='blue', label='validation accuracy')
     ax2.set_ylabel('accuracy')
     
     lns = r2 + r1
     labs = [l.get_label() for l in lns]
     leg = plt.legend(lns, labs, loc='center right', shadow=True)
     leg.draw_frame(False)
     plt.savefig(FIGURES_DIR + run_time + '_' + run_name + '_loss_and_acc_results.png')
     plt.gcf().clear()
 
 def plot_loss(run_time, run_name, history):
     """Generate a plot with training and validation loss for the given history."""
 
     num_epochs = list(range(len(history)))
     train_loss = [t[0] for t in history]
     valid_loss = [t[1] for t in history]
 
     fig, ax1 = plt.subplots()
 
     train_curve = ax1.plot(num_epochs, train_loss, color='red', label='training')
     valid_curve = ax1.plot(num_epochs, valid_loss, color='blue', label='validation')
     ax1.set_xlabel('epoch')
     ax1.set_ylabel('loss')
 
     leg = plt.legend(loc='upper right', shadow=True)
     leg.draw_frame(False)
     plt.savefig(FIGURES_DIR + run_time + '_' + run_name + '_losses.png')
     plt.gcf().clear()
 
 def plot_error(run_time, run_name, history):
     """Generates a plot with training validation errors for the given history."""
 
     num_epochs = list(range(len(history)))
     train_errors = [t[0] for t in history]
     valid_errors = [t[1] for t in history]
 
     fig, ax1 = plt.subplots()
 
     train_curve = ax1.plot(num_epochs, train_errors, color='red', label='training')
     valid_curve = ax1.plot(num_epochs, valid_errors, color='blue', label='validation')
     ax1.set_xlabel('epoch')
     ax1.set_ylabel('error %')
 
     leg = plt.legend(loc='upper right', shadow=True)
     leg.draw_frame(False)
     plt.savefig(FIGURES_DIR + run_time + '_' + run_name + '_errors.png')
     plt.gcf().clear()
 
 def colorbar(mappable):
     """Create a colorbar that matches properly the size of the image."""
     
     ax = mappable.axes
     fig = ax.figure
     divider = make_axes_locatable(ax)
     cax = divider.append_axes("right", size="5%", pad=0.05)
 
     return fig.colorbar(mappable, cax=cax)
 
-def show_filter(tensor, filter_name, num_filters, num_nodes, without_batch=False):
-    """Show the filters of the intermediary layers given by the tensor."""
+def show_tensor(image, filter_name, dim, num_nodes):
+    """Show the image given by the tensor."""
 
-    dim = int(np.sqrt(num_nodes))
-
-    if not without_batch:
-        # select the first batch
-        tensor = tensor.data[0]
-    else:
-        tensor = tensor.data
-
-    # keep only 3 filters in order to keep good quality in visualization
-    # if num_filters > 3:
-    #     num_filters=3
-
-    # create the figure containing all the filters
-    fig = plt.figure(figsize=(15,5))
-    for idx in range(num_filters):
-        ax = fig.add_subplot(1, num_filters, idx+1) # this line adds sub-axes
-        im = ax.imshow(tensor[:,idx].contiguous().view(dim,dim), cmap='jet')
-        #ax.set_axis_off()
-        #cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
-        #colorbar(im)
+    fig = plt.figure(figsize=(5,5))
+    ax = fig.add_subplot(1, 1, 1)
+    im = ax.imshow(image.view(dim,dim), cmap='jet')
 
     # add suptitle and display the plot
     plt.suptitle(filter_name)
     plt.tight_layout(h_pad=1)
     plt.show()
 
-def show_filters_single_window(list, h, w, num_nodes, with_relu=False):
-    """Show the filters of the intermediary layers given by the tensor."""
-
-    dim = int(np.sqrt(num_nodes))
-
-    relu = nn.ReLU(inplace=True)
-
-    # create the figure containing all the filters
-    fig = plt.figure(figsize=(15,10))
-    for i in range(len(list)):
-        for j in range(3):
-            ax = fig.add_subplot(h, w, i*3 + j+1) # this line adds sub-axes
-            if with_relu:
-                im = ax.imshow(relu(list[i]).detach()[0, :, j].contiguous().view(dim,dim), cmap='jet')
-            else:
-                im = ax.imshow(list[i].detach()[0, :, j].contiguous().view(dim,dim), cmap='jet')
-            ax.set_axis_off()
-            colorbar(im)
-
-    # add suptitle and display the plot
-    #plt.suptitle(filter_name)
-    plt.tight_layout(h_pad=1)
-    plt.show()
-
-def show_cheb_poly_single_window(cheb_poly, num_filters, num_nodes):
+def show_cheb_poly_tensor(cheb_poly, filter_name, num_filters, dim, num_nodes):
     """Show chebyshev polynomial of the intermediary layers given by the tensor."""
 
-    dim = int(np.sqrt(num_nodes))
-
-    #cheb_poly shape : B x F x N x D
-
     # create the figure containing all the filters
     fig = plt.figure(figsize=(15,10))
     for i in range(num_filters):
         for j in range(10):
             ax = fig.add_subplot(num_filters, 10, i*10 + j+1) # this line adds sub-axes
             im = ax.imshow(cheb_poly.detach()[1, i, :, j].contiguous().view(dim,dim), cmap='jet')
-            #ax.set_axis_off()
-            #colorbar(im)
 
     # add suptitle and display the plot
-    #plt.suptitle(filter_name)
+    plt.suptitle(filter_name)
     plt.tight_layout(h_pad=1)
-    plt.show()
-
-def plot_input(input, dim, index, num):
-    """Plot the input images."""
-
-    fig = plt.figure(figsize=(15,10))
-    for i in range(num):
-        for j in range(num):
-            ax = fig.add_subplot(num, num, i*num + j+1)
-            im = ax.imshow(input.data[index + (i*num)+j].contiguous().view(dim,dim), cmap='jet')
-
     plt.show()
\ No newline at end of file
diff --git a/saved_datasets.py b/saved_datasets.py
index 63f95e8..463b58b 100644
--- a/saved_datasets.py
+++ b/saved_datasets.py
@@ -1,56 +1,62 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 """
     Saved datasets module.
 """
 
 import numpy as np
 import logging
 import os
 
 import torch
 from torch.utils.data import TensorDataset, DataLoader
 
 from paths import SAVED_DATA
 from configuration import *
 from graph import shift_laplacian
+from utils import get_dim, count_class_freq
 
 logger = logging.getLogger(__name__)
 
-def get_dim(data):
-    """Get the dimension of the input image."""
-    dim = len(data[0])
-    return dim
-
 def load_saved_dataset(name, data_path=SAVED_DATA):
     """Load the saved data."""
 
     train_data = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_train_signals.npy'))).float()
     valid_data = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_val_signals.npy'))).float()
     test_data = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_test_signals.npy'))).float()
 
     train_labels = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_train_labels.npy'))).long()
     valid_labels = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_val_labels.npy'))).long()
     test_labels = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_test_labels.npy'))).long()
 
     train_dataset = TensorDataset(train_data, train_labels)
     valid_dataset = TensorDataset(valid_data, valid_labels)
     test_dataset = TensorDataset(test_data, test_labels)
 
     train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
     valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)
     test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, drop_last=True)
 
     dim = int(np.sqrt(get_dim(train_data)))
 
     laplacian_matrix = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_laplacian.npy'))).float()
     shifted_laplacian_matrix = shift_laplacian(laplacian_matrix, dim).to(DEVICE)
 
-    logger.info('Loaded dataset from the saved {} dataset.'.format(name))
+    if name == 'mnist_012':
+        num_classes = 3
+    elif name == 'eth80':
+        num_classes = 8
+    else:
+        num_classes = 9
+
+    logger.info('Class frequency \ntrain loader: {} \nvalidation loader: {} \ntest loader: {}'.format(
+        count_class_freq(train_loader, num_classes),count_class_freq(valid_loader, num_classes), count_class_freq(test_loader, num_classes))
+        )
+    logging.info('Loaded saved {} dataset with the split {}-{}-{} for the [train]-[valid]-[test] setup.'.format(name, len(train_loader)*BATCH_SIZE, len(valid_loader)*BATCH_SIZE, len(test_loader)*BATCH_SIZE))
 
     return train_loader, valid_loader, test_loader, dim, laplacian_matrix, shifted_laplacian_matrix
 
 
 
 
diff --git a/train.py b/train.py
index 1ee0cf1..c8a1f87 100644
--- a/train.py
+++ b/train.py
@@ -1,150 +1,151 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 """
     Main module.
 """
 
 import numpy as np
 import datetime
 import logging
 from tqdm import tqdm
 import time
 import sys
 
 from torch.autograd import Variable
 
 from datasets import load_dataset
 from saved_datasets import load_saved_dataset
 from graph import compute_laplacians 
 from utils import snapshot, load_pretrained_model
 from plot import plot_loss, plot_error
 from paths import SAVED_MODELS_DIR
 from configuration import *
 from models import *
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
 def load_model(dataset_name, dim, laplacian_matrix, shifted_laplacian_matrix):
     """Load the model associated with the dataset."""
 
     if dataset_name == 'mnist_012':
         model = TIGraNet_mnist_012(
             dim=dim,
             laplacian_matrix=laplacian_matrix,
             shifted_laplacian_matrix=shifted_laplacian_matrix,
             batch_size=BATCH_SIZE,
             learning_rate=LEARNING_RATE,
             freeze_sc_weights=True
             )
     elif dataset_name == 'mnist_rot':
         model = TIGraNet_mnist_rot(
             dim=dim,
             laplacian_matrix=laplacian_matrix,
             shifted_laplacian_matrix=shifted_laplacian_matrix,
             batch_size=BATCH_SIZE,
             learning_rate=LEARNING_RATE,
             freeze_sc_weights=True
             )
     elif dataset_name == 'mnist_trans':
         model = TIGraNet_mnist_trans(
             dim=dim,
             laplacian_matrix=laplacian_matrix,
             shifted_laplacian_matrix=shifted_laplacian_matrix,
             batch_size=BATCH_SIZE,
             learning_rate=LEARNING_RATE,
             freeze_sc_weights=True
             )
     elif dataset_name == 'eth80':
         model = TIGraNet_eth80(
             dim=dim,
             laplacian_matrix=laplacian_matrix,
             shifted_laplacian_matrix=shifted_laplacian_matrix,
             batch_size=BATCH_SIZE,
             learning_rate=LEARNING_RATE,
             freeze_sc_weights=True
             )
     
     return model
 
 # get arguments from command line
 if len(sys.argv) != 2:
     print('Usage: python3 train.py [DATASET]')
     sys.exit(1)
 else:
     dataset_name = sys.argv[-1]
     if dataset_name not in ['mnist_012', 'mnist_rot', 'mnist_trans', 'eth80']:
         print('DATASET available: mnist_012, mnist_rot, mnist_trans or eth80')
         sys.exit(1)
 
 # prepare data and model
 train_loader, valid_loader, _, dim, laplacian_matrix, shifted_laplacian_matrix = load_saved_dataset(name=dataset_name)
 model = load_model(dataset_name=dataset_name, dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix)
 
 # pass it to GPU if available
 model.to(DEVICE)
 
 logging.info('Training...')
 RUN_TIME = '{:%Y-%m-%d_%H-%M}'.format(datetime.datetime.now())
 RUN_NAME = '{}_{}_{}_{:.0e}'.format(
     type(model).__name__,
     type(model.optimizer).__name__,
     #'F' if model.freeze_sc_weights else 'NF',
     BATCH_SIZE,
     LEARNING_RATE
 )
 epoch = 0
 best_error = (0,100)
 loss_history = []
 error_history = []
 while True:
 
     # train the model
     loss_train = 0
     acc_train = 0
     for data, target in tqdm(train_loader, desc='Training', leave=False):
         
         data, target = data.to(DEVICE), target.to(DEVICE)
         loss = model.step(data, target, train=True)
         loss_train += loss
 
         y_pred = model.predict(data)
         acc_train += torch.eq(y_pred.cpu(),target.cpu()).sum().item()
+        
 
     # validate the model
     loss_valid = 0
     acc_valid = 0
     for data, target in tqdm(valid_loader, desc='Validation', leave=False):
 
-        data = data.to(DEVICE)
+        data, target = data.to(DEVICE), target.to(DEVICE)
         loss = model.step(data, target, train=False)
         loss_valid += loss
 
         y_pred = model.predict(data)
         acc_valid += torch.eq(y_pred.cpu(),target.cpu()).sum().item()
 
     # print some metrics
     train_samples_size = len(train_loader) * BATCH_SIZE
     valid_samples_size = len(valid_loader) * BATCH_SIZE
     loss_train_epoch = loss_train / train_samples_size
     loss_valid_epoch = loss_valid / valid_samples_size
     error_train_epoch = 100 - 100 * (acc_train / train_samples_size)
     error_valid_epoch = 100 - 100 * (acc_valid / valid_samples_size)
     error_history.append((error_train_epoch, error_valid_epoch))
     loss_history.append((loss_train_epoch, loss_valid_epoch))
     print('Epoch: {} train loss: {:.5f} valid loss: {:.5f} train error: {:.2f} % valid error: {:.2f} %'.format(epoch, loss_train_epoch, loss_valid_epoch, error_train_epoch, error_valid_epoch))
 
     # check if model is better
     if error_valid_epoch < best_error[1]:
         best_error = (epoch, error_valid_epoch)
-        snapshot(SAVED_MODELS_DIR, RUN_TIME, RUN_NAME, True, epoch, error_valid_epoch, model.state_dict())
+        snapshot(SAVED_MODELS_DIR, RUN_TIME, RUN_NAME, True, epoch, error_valid_epoch, model.state_dict(), model.optimizer.state_dict())
 
     # check that the model is not doing worst over the time
     if best_error[0] + PATIENCE < epoch :
         print('Overfitting. Stopped at epoch {}.' .format(epoch))
         break
     epoch += 1
 
     plot_loss(RUN_TIME, RUN_NAME, loss_history)
     plot_error(RUN_TIME, RUN_NAME, error_history)
diff --git a/utils.py b/utils.py
index 37cb988..3b069af 100644
--- a/utils.py
+++ b/utils.py
@@ -1,154 +1,156 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 """
     Utilitary functions.
 """
 
 import numpy as np
 import matplotlib.pyplot as plt
 import random
 import glob
 import logging
 
 import torch
 import torchvision
 from torch.autograd import Variable
 from torch.utils.data.sampler import SubsetRandomSampler
 
 from configuration import *
 
 random.seed(SEED)
 
 logger = logging.getLogger(__name__)
 
 def select(dataset, size, digits_to_keep, stratified_sampling=False):
     """Select randomly specific elements given by digits_to_keep."""
     
     len_dataset = len(dataset)
     indices = list(range(len_dataset))
     random_select_indices = []
     random.shuffle(indices)
 
     if stratified_sampling:
         num_classes = len(digits_to_keep)
         classes = [[] for _ in range(num_classes)]
 
         for i in indices:
             if dataset[i][1] in digits_to_keep:
                 classe = dataset[i][1]
                 classes[classe].append(i)
 
         for i in range(np.min([len(classes[0]), len(classes[1]), len(classes[2])])):
             for j in range(num_classes):
                 if len(random_select_indices) < size:
                     random_select_indices.append(classes[j][i])
                 else:
                     break
 
     else:
         for i in indices:
             if len(random_select_indices) < size and dataset[i][1] in digits_to_keep:
                 random_select_indices.append(i)
         
     return random_select_indices
 
 def train_valid_split(dataset, train_size):
     """Split the dataset into training and validaiton set."""
     
     len_dataset = len(dataset)
     indices = list(range(len_dataset))
 
     train_indices = indices[:train_size]
     valid_indices = indices[train_size:]
 
     return SubsetRandomSampler(train_indices), SubsetRandomSampler(valid_indices)
 
 def train_valid_test_split(dataset, train_size, valid_size):
     """Split the dataset into training, validation and testing set."""
    
     indices = list(range(len(dataset)))
     random.shuffle(indices)
     train_indices, valid_indices, test_indices = indices[:train_size], indices[train_size:train_size+valid_size], indices[train_size+valid_size:]
 
     return SubsetRandomSampler(train_indices), SubsetRandomSampler(valid_indices), SubsetRandomSampler(test_indices)
 
 def imshow_data_loader(data_loader, eth80_class_names=[]):
     """Show image provided by the data loader."""
     
     # get a batch of data
     inputs, classes = next(iter(data_loader))
     out = torchvision.utils.make_grid(tensor=inputs)
 
     # get the corresponding values
     if eth80_class_names:
         title = [eth80_class_names[x] for x in classes]
         mean = ETH80_MEAN
         std = ETH80_STD
     else:
         title = [x for x in classes]
         mean = MNIST_MEAN
         std = MNIST_STD
 
     # build the original image
     out = out.numpy().transpose((1, 2, 0))
     out = std * out + mean
     out = np.clip(out, 0, 1)
 
     # display it
     plt.imshow(out)
     plt.title(title)
     plt.show()
 
 def show_spectrum(tensor, num_filters):
     """Show the spectrum of the spectral layer. """
     
     return NotImplemented
 
-def snapshot(saved_model_dir, run_time, run_name, is_best, epoch, err_epoch, state_dict):
-	"""Save the model state."""
-	
-	# Write the full name
-	if is_best:
-		complete_name = '{}{}_{}_{}_{:.2f}_best'.format(saved_model_dir, run_time, run_name, epoch, err_epoch)
-	else:
-		complete_name = '{}{}_{}_{}_{:.2f}'.format(saved_model_dir, run_time, run_name, epoch, err_epoch)
-	
+def snapshot(saved_model_dir, run_time, run_name, is_best, epoch, err_epoch, model_state_dict, optim_state_dict):
+    """Save the model state."""
+    
+    complete_name = '{}{}_{}_{}_{:.2f}'.format(saved_model_dir, run_time, run_name, epoch, err_epoch)
+    
+    states = {
+        'model': model_state_dict,
+        'optimizer': optim_state_dict
+        }
+
 	# Save the model
-	with open(complete_name + '.pt', 'wb') as f:
-		torch.save(state_dict, f)
+    with open(complete_name + '.pt', 'wb') as f:
+        torch.save(states, f)
 
 def load_pretrained_model(saved_model_dir, run_name, model):
     """Load the specified model."""
     
-    model_state = glob.glob(saved_model_dir + run_name)[0]
+    states = glob.glob(saved_model_dir + run_name)[0]
 
     if torch.cuda.is_available():
-        model.load_state_dict(torch.load(model_state))
-        model.cuda()
+        checkpoint = torch.load(states)
     else:
-        state_dict = torch.load(model_state, map_location=lambda storage, loc: storage)
+        checkpoint = torch.load(states, map_location=lambda storage, loc: storage)
         
-        # in case we load state_dict with different architecture (subset)
-        from collections import OrderedDict
-        new_state_dict = OrderedDict()
-        params_name = []
-        for name, _ in model.named_parameters():
-            params_name.append(name)
-
-        for k, v in state_dict.items():
-            if k in params_name:
-                new_state_dict[k] = v
-
-        # load params
-        model.load_state_dict(new_state_dict)
-        #model.load_state_dict(torch.load(model_state, map_location=lambda storage, loc: storage))
+    model.load_state_dict(checkpoint['model'])
+    model.optimizer.load_state_dict(checkpoint['optimizer'])
 
     logging.info('Loaded {} model.'.format(run_name))
 
     return model
 
 def init_mask(num_nodes, batch_size):
     """Initialize the nodes of interest by including all the nodes of the graph."""
     mask = Variable(torch.ones(batch_size, num_nodes, 1)).to(DEVICE)
-    return mask
\ No newline at end of file
+    return mask
+
+def count_class_freq(loader, num_classes):
+    """Return the frequency for each class from the loader."""
+        
+    t = np.zeros(num_classes)
+    for _, target in loader:
+        for c in target:
+            t[c] +=1
+    return t
+
+def get_dim(data):
+    """Get the dimension of the input image."""
+    dim = len(data[0])
+    return dim
\ No newline at end of file