diff --git a/configuration.py b/configuration.py index eae1871..5749a77 100644 --- a/configuration.py +++ b/configuration.py @@ -1,31 +1,31 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ Configuration file. """ import torch # settings for the implementation SEED = 7 # used device (CPU vs GPU) DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # settings for the dataset DATASET = 'mnist_012' TRAIN_SIZE = 500 VALID_SIZE = 100 TEST_SIZE = 100 # properties of the model BATCH_SIZE = 100 LEARNING_RATE = 1e-4 USE_PRETRAINED_MODEL = False # settings of the run TRAIN = True -PATIENCE = 100 +PATIENCE = 20 # debugging GENERATE_SAVE = False \ No newline at end of file diff --git a/datasets.py b/datasets.py index f1ac475..f09da5d 100644 --- a/datasets.py +++ b/datasets.py @@ -1,126 +1,111 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ Datasets module. """ import numpy as np import logging from torch.utils.data import DataLoader from torchvision import datasets, transforms from loader import MNIST_bis from custom_transform import RandomTranslation, Substract from utils import train_valid_split, train_valid_test_split from configuration import * +from utils import get_dim, count_class_freq logger = logging.getLogger(__name__) torch.manual_seed(SEED) # datasets mean and standard deviation used for normalization # L = R * 299/1000 + G * 587/1000 + B * 114/1000 MNIST_MEAN = [0.458] MNIST_STD = [0.225] ETH80_MEAN = [0.426] ETH80_STD = [0.166] -def count_class_freq(loader, num_classes): - """Return the frequency for each class from the loader.""" - - t = np.zeros(num_classes) - for _, target in loader: - for c in target: - t[c] +=1 - return t - -def get_dim(loader): - """Get the dimension of the input image.""" - - dim = iter(loader).next()[0].size()[2] - - return dim - def load_dataset(dataset, train_size, valid_size, test_size): """Load the dataset passed in argument with the corresponding sizes for the training, validation and testing set.""" if dataset == 'mnist_012': root = './data/mnist' num_classes = 3 trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)]) train_valid_set = datasets.MNIST(root=root, train=True, transform=trans) test_set = datasets.MNIST(root=root, train=False, transform=trans) train_valid_set = MNIST_bis(dataset=train_valid_set, size=train_size+valid_size, digits_to_keep=[0,1,2]) test_set = MNIST_bis(dataset=test_set, size=test_size, digits_to_keep=[0,1,2]) train_sampler, valid_sampler = train_valid_split(dataset=train_valid_set, train_size=train_size) train_loader = DataLoader(dataset=train_valid_set, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True) valid_loader = DataLoader(dataset=train_valid_set, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True) test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True, drop_last=True) elif dataset == 'mnist_rot': root = './data/mnist' num_classes = 9 train_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)]) test_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), transforms.RandomRotation((0,360)), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)]) train_valid_set = datasets.MNIST(root=root, train=True, transform=train_trans) test_set = datasets.MNIST(root=root, train=False, transform=test_trans) train_valid_set_bis = MNIST_bis(dataset=train_valid_set, size=train_size+valid_size, digits_to_keep=[0,1,2,3,4,5,6,7,8]) test_set = MNIST_bis(dataset=test_set, size=test_size, digits_to_keep=[0,1,2,3,4,5,6,7,8]) train_sampler, valid_sampler = train_valid_split(dataset=train_valid_set_bis, train_size=train_size) train_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True) valid_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True) test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True, drop_last=True) elif dataset == 'mnist_trans': root = './data/mnist' num_classes = 9 train_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)]) test_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), RandomTranslation(horizontal=6, vertical=6), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)]) train_valid_set = datasets.MNIST(root=root, train=True, transform=train_trans) test_set = datasets.MNIST(root=root, train=False, transform=test_trans) train_valid_set_bis = MNIST_bis(dataset=train_valid_set, size=train_size+valid_size, digits_to_keep=[0,1,2,3,4,5,6,7,8]) test_set = MNIST_bis(dataset=test_set, size=test_size, digits_to_keep=[0,1,2,3,4,5,6,7,8]) train_sampler, valid_sampler = train_valid_split(dataset=train_valid_set_bis, train_size=train_size) train_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True) valid_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True) test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True, drop_last=True) elif dataset == 'eth80': root = './data/eth80' num_classes = 8 trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((50,50)), transforms.ToTensor(), transforms.Normalize(mean=ETH80_MEAN, std=ETH80_STD)]) complete_set = datasets.ImageFolder(root=root, transform=trans) class_names = complete_set.classes train_sampler, valid_sampler, test_sampler = train_valid_test_split(dataset=complete_set, train_size=train_size, valid_size=valid_size) train_loader = DataLoader(dataset=complete_set, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True) valid_loader = DataLoader(dataset=complete_set, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True) test_loader = DataLoader(dataset=complete_set, batch_size=BATCH_SIZE, sampler=test_sampler, num_workers=4, pin_memory=True, drop_last=True) else: raise ValueError('Specified dataset does not exist.') logger.debug('Class frequency train loader: {} validation loader: {} test loader: {}'.format( count_class_freq(train_loader, num_classes),count_class_freq(valid_loader, num_classes), count_class_freq(test_loader, num_classes)) ) logging.info('Loaded {} dataset with the split {}-{}-{} for the [train]-[valid]-[test] setup.'.format(dataset, len(train_loader)*BATCH_SIZE, len(valid_loader)*BATCH_SIZE, len(test_loader)*BATCH_SIZE)) return train_loader, valid_loader, test_loader, get_dim(train_loader) diff --git a/evaluate.py b/evaluate.py index 93d6159..b659579 100644 --- a/evaluate.py +++ b/evaluate.py @@ -1,93 +1,93 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ Test module. """ import numpy as np import logging from tqdm import tqdm import sys from saved_datasets import load_saved_dataset from graph import compute_laplacians from utils import load_pretrained_model, snapshot from paths import SAVED_MODELS_DIR from configuration import * from models import TIGraNet_mnist_012, TIGraNet_mnist_rot, TIGraNet_mnist_trans, TIGraNet_eth80 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def load_model(dataset_name, dim, laplacian_matrix, shifted_laplacian_matrix): """Load the model associated with the dataset.""" if dataset_name == 'mnist_012': model = TIGraNet_mnist_012( dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, load_pretrained_weights=True ) elif dataset_name == 'mnist_rot': model = TIGraNet_mnist_rot( dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, load_pretrained_weights=True ) elif dataset_name == 'mnist_trans': model = TIGraNet_mnist_trans( dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, load_pretrained_weights=True ) elif dataset_name == 'eth80': model = TIGraNet_eth80( dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, load_pretrained_weights=True ) return model # get arguments from command line if len(sys.argv) != 2: print('Usage: python3 evaluate.py [DATASET]') sys.exit(1) else: dataset_name = sys.argv[-1] if dataset_name not in ['mnist_012', 'mnist_rot', 'mnist_trans', 'eth80']: print('DATASET available: mnist_012, mnist_rot, mnist_trans or eth80') sys.exit(1) # prepare data and model _, _, test_loader, dim, laplacian_matrix, shifted_laplacian_matrix = load_saved_dataset(name=dataset_name) model = load_model(dataset_name=dataset_name, dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix) # pass it to GPU if available model.to(DEVICE) # evaluate on testing set logging.info('Testing...') acc_valid = 0 test_samples_size = len(test_loader) * BATCH_SIZE for data, target in tqdm(test_loader): data = data.to(DEVICE) y_pred = model.predict(data) - acc_valid += torch.eq(y_pred.data.cpu(),target).sum().item() + acc_valid += torch.eq(y_pred.cpu(),target).sum().item() error_test = 100 - 100 * acc_valid / test_samples_size print('test error: {:.2f} %'.format(error_test)) \ No newline at end of file diff --git a/models.py b/models.py index debfa73..7579f8c 100644 --- a/models.py +++ b/models.py @@ -1,492 +1,492 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ Neural Networks models module. """ import numpy as np import logging import os import torch import torch.nn as nn import torch.optim as optim from torch.autograd import Variable from layers import SpectralConv, DynamicPool, Statistic from utils import init_mask from configuration import * from paths import SAVED_DATA, DEBUG_DIR_MNIST_012, DEBUG_DIR_MNIST_rot, DEBUG_DIR_ETH80 logger = logging.getLogger(__name__) class TIGraNet(nn.Module): - def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=True): + def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=False): super(TIGraNet, self).__init__() self.num_nodes = dim**2 self.laplacian_matrix = laplacian_matrix self.shifted_laplacian_matrix = shifted_laplacian_matrix self.batch_size = batch_size self.learning_rate = learning_rate self.mask = init_mask(num_nodes=self.num_nodes, batch_size=self.batch_size) self.load_pretrained_weights = load_pretrained_weights self.freeze_sc_weights = freeze_sc_weights self.loss_function = torch.nn.CrossEntropyLoss() def init_pretrained_weights(self, name): """Initialize the weights of the model with pretrained weights.""" self.spectral_conv1.alpha.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'alpha_0.npy')))) self.spectral_conv1.beta.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'beta_0.npy'))).unsqueeze(0)) self.spectral_conv2.alpha.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'alpha_1.npy')))) self.spectral_conv2.beta.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'beta_1.npy'))).unsqueeze(0)) self.fully_connected[0].weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'W_1.npy'))).t()) self.fully_connected[0].bias = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'b_1.npy')))) self.fully_connected[2].weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'W_2.npy'))).t()) self.fully_connected[2].bias = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'b_2.npy')))) self.fully_connected[4].weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'W_3.npy'))).t()) self.fully_connected[4].bias = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'b_3.npy')))) self.fully_connected[6].weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'W_last.npy'))).t()) self.fully_connected[6].bias = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'b_last.npy')))) if name=='mnist_012': self.spectral_conv3.alpha.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'alpha_2.npy')))) self.spectral_conv3.beta.weight = nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, name, 'parameters', 'beta_2.npy'))).unsqueeze(0)) def prepare_input(self, input): input = input.view(self.batch_size, 1, self.num_nodes) input = input - torch.mean(input, 2, True) input = input.transpose(1,2) return input def step(self, input, target, train): if train: self.train() else: self.eval() self.optimizer.zero_grad() out = self.forward(input) loss = self.loss_function(out, target) if train: loss.backward() self.optimizer.step() return loss.item() def predict(self, input): self.eval() output = self.forward(input) _, output = torch.max(output, 1) return output class TIGraNet_mnist_012(TIGraNet): - def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=True): + def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=False): TIGraNet.__init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights, freeze_sc_weights) # Main layers self.spectral_conv1 = SpectralConv( batch_size=self.batch_size, num_nodes=self.num_nodes, filter_size_in=1, filter_size_out=3, degree_of_polynomial=4, laplacian_matrix=self.laplacian_matrix, mask=self.mask ) self.dynamic_pool1 = DynamicPool( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=3, num_active_nodes=200, mask=self.mask ) self.spectral_conv2 = SpectralConv( batch_size=self.batch_size, num_nodes=self.num_nodes, filter_size_in=3, filter_size_out=6, degree_of_polynomial=4, laplacian_matrix=self.laplacian_matrix, mask=self.mask ) self.dynamic_pool2 = DynamicPool( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=6, num_active_nodes=100, mask=self.mask ) self.spectral_conv3 = SpectralConv( batch_size=self.batch_size, num_nodes=self.num_nodes, filter_size_in=6, filter_size_out=9, degree_of_polynomial=4, laplacian_matrix=self.laplacian_matrix, mask=self.mask ) self.dynamic_pool3 = DynamicPool( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=9, num_active_nodes=50, mask=self.mask ) self.statistic = Statistic( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=9, degree_of_polynomial=9, shifted_laplacian_matrix=self.shifted_laplacian_matrix ) self.fully_connected = nn.Sequential( nn.Linear(in_features=180, out_features=100), nn.ReLU(inplace=True), nn.Linear(in_features=100, out_features=80), nn.ReLU(inplace=True), nn.Linear(in_features=80, out_features= 60), nn.ReLU(inplace=True), nn.Linear(in_features=60, out_features=3) ) if load_pretrained_weights: self.init_pretrained_weights(name='mnist_012') # random checks assert (self.spectral_conv2.alpha.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_012', 'parameters', 'alpha_1.npy'))))).all() assert (self.spectral_conv2.beta.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_012', 'parameters', 'beta_1.npy'))).unsqueeze(0))).all() assert (self.fully_connected[2].weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_012', 'parameters', 'W_2.npy'))).t())).all() logger.info('Loaded pretrained weights.') else: self.init_weights_default() logger.info('Loaded weights using uniform distribution in [0,1].') if freeze_sc_weights: # freeze the parameters of the spectral conv layer for m in self.modules(): if isinstance(m, SpectralConv): m.alpha.weight.requires_grad = False m.beta.weight.requires_grad = False logger.info('Freezed spectral conv weights.') self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=learning_rate) logger.info('Loaded {} optimizer.'.format(type(self.optimizer).__name__)) def init_weights_default(self): """Initialize the weights of the model with uniform distribution in [0,1].""" for m in self.modules(): if isinstance(m, SpectralConv): nn.init.uniform_(m.alpha.weight) nn.init.uniform_(m.beta.weight) def forward(self, input): prepared_input = self.prepare_input(input) filter_operator1, y1, spectral_conv1 = self.spectral_conv1(prepared_input, self.mask) mask1, dynamic_pool1 = self.dynamic_pool1(spectral_conv1, self.mask) filter_operator2, y2, spectral_conv2 = self.spectral_conv2(spectral_conv1, mask1) mask2, dynamic_pool2 = self.dynamic_pool2(spectral_conv2, mask1) filter_operator3, y3, spectral_conv3 = self.spectral_conv3(spectral_conv2, mask2) mask3, dynamic_pool3 = self.dynamic_pool3(spectral_conv3, mask2) statistic = self.statistic(spectral_conv3, mask3) output = self.fully_connected(statistic) if GENERATE_SAVE: # save all intermediary steps for debugging variables = [prepared_input, filter_operator1, y1, spectral_conv1, filter_operator2, y2, spectral_conv2, filter_operator3, y3, spectral_conv3, mask1, mask2, mask3, statistic, output] variables_names = ['prepared_input', 'filter_operator1', 'y1', 'spectral_conv1', 'filter_operator2', 'y2', 'spectral_conv2', 'filter_operator3', 'y3', 'spectral_conv3', 'mask1', 'mask2', 'mask3', 'statistic', 'output'] tuples = zip(variables, variables_names) for v, n in tuples: # np.save(DEBUG_DIR_MNIST_012 + 'constant_weights/' + n + '_p', v.detach().numpy()) np.save(DEBUG_DIR_MNIST_012 + 'pretrained_weights/' + n + '_p_pw', v.detach().numpy()) return output class TIGraNet_mnist_rot(TIGraNet): - def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=True): + def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=False): TIGraNet.__init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights, freeze_sc_weights) # Main layers self.spectral_conv1 = SpectralConv( batch_size=self.batch_size, num_nodes=self.num_nodes, filter_size_in=1, filter_size_out=10, degree_of_polynomial=4, laplacian_matrix=self.laplacian_matrix, mask=self.mask ) self.dynamic_pool1 = DynamicPool( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=10, num_active_nodes=600, mask=self.mask ) self.spectral_conv2 = SpectralConv( batch_size=self.batch_size, num_nodes=self.num_nodes, filter_size_in=10, filter_size_out=20, degree_of_polynomial=4, laplacian_matrix=self.laplacian_matrix, mask=self.mask ) self.dynamic_pool2 = DynamicPool( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=20, num_active_nodes=300, mask=self.mask ) self.statistic = Statistic( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=20, degree_of_polynomial=13, shifted_laplacian_matrix=self.shifted_laplacian_matrix ) self.fully_connected = nn.Sequential( nn.Linear(in_features=560, out_features=500), nn.ReLU(inplace=True), nn.Linear(in_features=500, out_features=300), nn.ReLU(inplace=True), nn.Linear(in_features=300, out_features= 100), nn.ReLU(inplace=True), nn.Linear(in_features=100, out_features=9) ) if load_pretrained_weights: self.init_pretrained_weights(name='mnist_rot') # random checks assert (self.spectral_conv2.alpha.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_rot', 'parameters', 'alpha_1.npy'))))).all() assert (self.spectral_conv2.beta.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_rot', 'parameters', 'beta_1.npy'))).unsqueeze(0))).all() assert (self.fully_connected[2].weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_rot', 'parameters', 'W_2.npy'))).t())).all() logger.info('Loaded pretrained weights.') if freeze_sc_weights: # freeze the parameters of the spectral conv layer for m in self.modules(): if isinstance(m, SpectralConv): m.alpha.weight.requires_grad = False m.beta.weight.requires_grad = False logger.info('Freezed spectral conv weights.') self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=learning_rate) logger.info('Loaded {} optimizer.'.format(type(self.optimizer).__name__)) def forward(self, input): prepared_input = self.prepare_input(input) filter_operator1, y1, spectral_conv1 = self.spectral_conv1(prepared_input, self.mask) mask1, dynamic_pool1 = self.dynamic_pool1(spectral_conv1, self.mask) filter_operator2, y2, spectral_conv2 = self.spectral_conv2(spectral_conv1, mask1) mask2, dynamic_pool2 = self.dynamic_pool2(spectral_conv2, mask1) statistic = self.statistic(spectral_conv2, mask2) output = self.fully_connected(statistic) if GENERATE_SAVE: # save all intermediary steps for debugging variables = [prepared_input, filter_operator1, y1, spectral_conv1, filter_operator2, y2, spectral_conv2, mask1, mask2, statistic, output] variables_names = ['prepared_input', 'filter_operator1', 'y1', 'spectral_conv1', 'filter_operator2', 'y2', 'spectral_conv2', 'mask1', 'mask2', 'statistic', 'output'] tuples = zip(variables, variables_names) for v, n in tuples: np.save(DEBUG_DIR_MNIST_rot + n + '_p_pw', v.detach().numpy()) return output class TIGraNet_mnist_trans(TIGraNet): - def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=True): + def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=False): TIGraNet.__init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights, freeze_sc_weights) # Main layers self.spectral_conv1 = SpectralConv( batch_size=self.batch_size, num_nodes=self.num_nodes, filter_size_in=1, filter_size_out=10, degree_of_polynomial=7, laplacian_matrix=self.laplacian_matrix, mask=self.mask ) self.dynamic_pool1 = DynamicPool( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=10, num_active_nodes=600, mask=self.mask ) self.spectral_conv2 = SpectralConv( batch_size=self.batch_size, num_nodes=self.num_nodes, filter_size_in=10, filter_size_out=20, degree_of_polynomial=7, laplacian_matrix=self.laplacian_matrix, mask=self.mask ) self.dynamic_pool2 = DynamicPool( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=20, num_active_nodes=300, mask=self.mask ) self.statistic = Statistic( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=20, degree_of_polynomial=11, shifted_laplacian_matrix=self.shifted_laplacian_matrix ) self.fully_connected = nn.Sequential( nn.Linear(in_features=480, out_features=500), nn.ReLU(inplace=True), nn.Linear(in_features=500, out_features=300), nn.ReLU(inplace=True), nn.Linear(in_features=300, out_features= 100), nn.ReLU(inplace=True), nn.Linear(in_features=100, out_features=9) ) if load_pretrained_weights: self.init_pretrained_weights(name='mnist_trans') # random checks assert (self.spectral_conv2.alpha.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_trans', 'parameters', 'alpha_1.npy'))))).all() assert (self.spectral_conv2.beta.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_trans', 'parameters', 'beta_1.npy'))).unsqueeze(0))).all() assert (self.fully_connected[2].weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'mnist_trans', 'parameters', 'W_2.npy'))).t())).all() logger.info('Loaded pretrained weights.') if freeze_sc_weights: # freeze the parameters of the spectral conv layer for m in self.modules(): if isinstance(m, SpectralConv): m.alpha.weight.requires_grad = False m.beta.weight.requires_grad = False logger.info('Freezed spectral conv weights.') self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=learning_rate) logger.info('Loaded {} optimizer.'.format(type(self.optimizer).__name__)) def forward(self, input): prepared_input = self.prepare_input(input) filter_operator1, y1, spectral_conv1 = self.spectral_conv1(prepared_input, self.mask) mask1, dynamic_pool1 = self.dynamic_pool1(spectral_conv1, self.mask) filter_operator2, y2, spectral_conv2 = self.spectral_conv2(spectral_conv1, mask1) mask2, dynamic_pool2 = self.dynamic_pool2(spectral_conv2, mask1) statistic = self.statistic(spectral_conv2, mask2) output = self.fully_connected(statistic) return output class TIGraNet_eth80(TIGraNet): - def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=True): + def __init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights=False, freeze_sc_weights=False): TIGraNet.__init__(self, dim, laplacian_matrix, shifted_laplacian_matrix, batch_size, learning_rate, load_pretrained_weights, freeze_sc_weights) # Main layers self.spectral_conv1 = SpectralConv( batch_size=self.batch_size, num_nodes=self.num_nodes, filter_size_in=1, filter_size_out=10, degree_of_polynomial=5, laplacian_matrix=self.laplacian_matrix, mask=self.mask ) self.dynamic_pool1 = DynamicPool( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=10, num_active_nodes=600, mask=self.mask ) self.spectral_conv2 = SpectralConv( batch_size=self.batch_size, num_nodes=self.num_nodes, filter_size_in=10, filter_size_out=20, degree_of_polynomial=5, laplacian_matrix=self.laplacian_matrix, mask=self.mask ) self.dynamic_pool2 = DynamicPool( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=20, num_active_nodes=300, mask=self.mask ) self.statistic = Statistic( batch_size=self.batch_size, num_nodes=self.num_nodes, num_filters=20, degree_of_polynomial=11, shifted_laplacian_matrix=self.shifted_laplacian_matrix ) self.fully_connected = nn.Sequential( nn.Linear(in_features=480, out_features=500), nn.ReLU(inplace=True), nn.Linear(in_features=500, out_features=300), nn.ReLU(inplace=True), nn.Linear(in_features=300, out_features= 100), nn.ReLU(inplace=True), nn.Linear(in_features=100, out_features=8) ) if load_pretrained_weights: self.init_pretrained_weights(name='eth80') # random checks assert (self.spectral_conv2.alpha.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'eth80', 'parameters', 'alpha_1.npy'))))).all() assert (self.spectral_conv2.beta.weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'eth80', 'parameters', 'beta_1.npy'))).unsqueeze(0))).all() assert (self.fully_connected[2].weight == nn.Parameter(torch.from_numpy(np.load(os.path.join(SAVED_DATA, 'eth80', 'parameters', 'W_2.npy'))).t())).all() logger.info('Loaded pretrained weights.') if freeze_sc_weights: # freeze the parameters of the spectral conv layer for m in self.modules(): if isinstance(m, SpectralConv): m.alpha.weight.requires_grad = False m.beta.weight.requires_grad = False logger.info('Freezed spectral conv weights.') self.optimizer = optim.Adam(filter(lambda p: p.requires_grad, self.parameters()), lr=learning_rate) logger.info('Loaded {} optimizer.'.format(type(self.optimizer).__name__)) def forward(self, input): prepared_input = self.prepare_input(input) filter_operator1, y1, spectral_conv1 = self.spectral_conv1(prepared_input, self.mask) mask1, dynamic_pool1 = self.dynamic_pool1(spectral_conv1, self.mask) filter_operator2, y2, spectral_conv2 = self.spectral_conv2(spectral_conv1, mask1) mask2, dynamic_pool2 = self.dynamic_pool2(spectral_conv2, mask1) statistic = self.statistic(spectral_conv2, mask2) output = self.fully_connected(statistic) if GENERATE_SAVE: # save all intermediary steps for debugging variables = [prepared_input, filter_operator1, y1, spectral_conv1, filter_operator2, y2, spectral_conv2, mask1, mask2, statistic, output] variables_names = ['prepared_input', 'filter_operator1', 'y1', 'spectral_conv1', 'filter_operator2', 'y2', 'spectral_conv2', 'mask1', 'mask2', 'statistic', 'output'] tuples = zip(variables, variables_names) for v, n in tuples: np.save(DEBUG_DIR_ETH80 + n + '_p_pw', v.detach().numpy()) return output \ No newline at end of file diff --git a/numerical_instability_example.py b/numerical_instability_example.py index 3f88a85..9a5ed24 100644 --- a/numerical_instability_example.py +++ b/numerical_instability_example.py @@ -1,37 +1,61 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ Illustrates numerical instability for similar function in Theano and PyTorch. """ import torch import numpy as np import theano import theano.tensor as T import os from paths import * torch.manual_seed(7) def func_pytorch(a, b): a = a.permute(0,2,1).contiguous().view(-1,400) b = b.view(400,-1) return torch.matmul(a, b).view(100,5,400,10).permute(0,2,3,1) def func_theano(a, b): return T.tensordot(a, b, [[2], [2]]) +def func_numpy(a,b): + return np.tensordot(a,b, [[2],[2]]) + a = torch.randn(100,400,5) b = torch.randn(400,400,10) a_p = a.permute(0,2,1) b_p = b.permute(2,1,0) -out = func_pytorch(a, b) +out_pytorch = func_pytorch(a, b) +out_numpy = np.transpose(func_numpy(a_p,b_p), (0,3,2,1)) out_true = np.transpose(func_theano(a_p, b_p).eval(), (0,3,2,1)) -np.testing.assert_allclose(actual=out, desired=out_true, rtol=1e-6) + + +# from debug import plot_pytorch_theano_image, plot_pytorch_theano_image_diff +# from path import * + +# print(out.shape, out_true[0,:,0,0].shape) + +# plot_pytorch_theano_image( +# images=[out[8,:,2,3].numpy(), out_true[8,:,2,3]], +# dir=DEBUG_DIR_MNIST_012 + 'constant_weights/', +# name='temp' +# ) + +# plot_pytorch_theano_image_diff( +# images=[out[8,:,2,3].numpy(), out_true[8,:,2,3]], +# dir=DEBUG_DIR_MNIST_012 + 'constant_weights/', +# name='temp_diff' +# ) + +np.testing.assert_allclose(actual=out_numpy, desired=out_true, rtol=1e-7) # OK +np.testing.assert_allclose(actual=out_pytorch, desired=out_true, rtol=1e-7) # 76% mismatch diff --git a/plot.py b/plot.py index f139ac5..f1a6638 100644 --- a/plot.py +++ b/plot.py @@ -1,171 +1,113 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ Plot functions to create figures. """ import matplotlib.pyplot as plt from mpl_toolkits.axes_grid1 import make_axes_locatable import numpy as np import torch.nn as nn from paths import FIGURES_DIR, DEBUG_DIR_MNIST_012 def plot_loss_and_acc(run_time, run_name, history): """Generate a plot with training loss and validation accuracy for a specific model.""" num_epochs = list(range(len(history))) loss = [t[0] for t in history] acc = [t[1] for t in history] fig, ax1 = plt.subplots() ax2 = ax1.twinx() r1 = ax1.plot(num_epochs, loss, color='red', label='training loss') ax1.set_xlabel('epoch') ax1.set_ylabel('loss') r2 = ax2.plot(num_epochs, acc, color='blue', label='validation accuracy') ax2.set_ylabel('accuracy') lns = r2 + r1 labs = [l.get_label() for l in lns] leg = plt.legend(lns, labs, loc='center right', shadow=True) leg.draw_frame(False) plt.savefig(FIGURES_DIR + run_time + '_' + run_name + '_loss_and_acc_results.png') plt.gcf().clear() def plot_loss(run_time, run_name, history): """Generate a plot with training and validation loss for the given history.""" num_epochs = list(range(len(history))) train_loss = [t[0] for t in history] valid_loss = [t[1] for t in history] fig, ax1 = plt.subplots() train_curve = ax1.plot(num_epochs, train_loss, color='red', label='training') valid_curve = ax1.plot(num_epochs, valid_loss, color='blue', label='validation') ax1.set_xlabel('epoch') ax1.set_ylabel('loss') leg = plt.legend(loc='upper right', shadow=True) leg.draw_frame(False) plt.savefig(FIGURES_DIR + run_time + '_' + run_name + '_losses.png') plt.gcf().clear() def plot_error(run_time, run_name, history): """Generates a plot with training validation errors for the given history.""" num_epochs = list(range(len(history))) train_errors = [t[0] for t in history] valid_errors = [t[1] for t in history] fig, ax1 = plt.subplots() train_curve = ax1.plot(num_epochs, train_errors, color='red', label='training') valid_curve = ax1.plot(num_epochs, valid_errors, color='blue', label='validation') ax1.set_xlabel('epoch') ax1.set_ylabel('error %') leg = plt.legend(loc='upper right', shadow=True) leg.draw_frame(False) plt.savefig(FIGURES_DIR + run_time + '_' + run_name + '_errors.png') plt.gcf().clear() def colorbar(mappable): """Create a colorbar that matches properly the size of the image.""" ax = mappable.axes fig = ax.figure divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) return fig.colorbar(mappable, cax=cax) -def show_filter(tensor, filter_name, num_filters, num_nodes, without_batch=False): - """Show the filters of the intermediary layers given by the tensor.""" +def show_tensor(image, filter_name, dim, num_nodes): + """Show the image given by the tensor.""" - dim = int(np.sqrt(num_nodes)) - - if not without_batch: - # select the first batch - tensor = tensor.data[0] - else: - tensor = tensor.data - - # keep only 3 filters in order to keep good quality in visualization - # if num_filters > 3: - # num_filters=3 - - # create the figure containing all the filters - fig = plt.figure(figsize=(15,5)) - for idx in range(num_filters): - ax = fig.add_subplot(1, num_filters, idx+1) # this line adds sub-axes - im = ax.imshow(tensor[:,idx].contiguous().view(dim,dim), cmap='jet') - #ax.set_axis_off() - #cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7]) - #colorbar(im) + fig = plt.figure(figsize=(5,5)) + ax = fig.add_subplot(1, 1, 1) + im = ax.imshow(image.view(dim,dim), cmap='jet') # add suptitle and display the plot plt.suptitle(filter_name) plt.tight_layout(h_pad=1) plt.show() -def show_filters_single_window(list, h, w, num_nodes, with_relu=False): - """Show the filters of the intermediary layers given by the tensor.""" - - dim = int(np.sqrt(num_nodes)) - - relu = nn.ReLU(inplace=True) - - # create the figure containing all the filters - fig = plt.figure(figsize=(15,10)) - for i in range(len(list)): - for j in range(3): - ax = fig.add_subplot(h, w, i*3 + j+1) # this line adds sub-axes - if with_relu: - im = ax.imshow(relu(list[i]).detach()[0, :, j].contiguous().view(dim,dim), cmap='jet') - else: - im = ax.imshow(list[i].detach()[0, :, j].contiguous().view(dim,dim), cmap='jet') - ax.set_axis_off() - colorbar(im) - - # add suptitle and display the plot - #plt.suptitle(filter_name) - plt.tight_layout(h_pad=1) - plt.show() - -def show_cheb_poly_single_window(cheb_poly, num_filters, num_nodes): +def show_cheb_poly_tensor(cheb_poly, filter_name, num_filters, dim, num_nodes): """Show chebyshev polynomial of the intermediary layers given by the tensor.""" - dim = int(np.sqrt(num_nodes)) - - #cheb_poly shape : B x F x N x D - # create the figure containing all the filters fig = plt.figure(figsize=(15,10)) for i in range(num_filters): for j in range(10): ax = fig.add_subplot(num_filters, 10, i*10 + j+1) # this line adds sub-axes im = ax.imshow(cheb_poly.detach()[1, i, :, j].contiguous().view(dim,dim), cmap='jet') - #ax.set_axis_off() - #colorbar(im) # add suptitle and display the plot - #plt.suptitle(filter_name) + plt.suptitle(filter_name) plt.tight_layout(h_pad=1) - plt.show() - -def plot_input(input, dim, index, num): - """Plot the input images.""" - - fig = plt.figure(figsize=(15,10)) - for i in range(num): - for j in range(num): - ax = fig.add_subplot(num, num, i*num + j+1) - im = ax.imshow(input.data[index + (i*num)+j].contiguous().view(dim,dim), cmap='jet') - plt.show() \ No newline at end of file diff --git a/saved_datasets.py b/saved_datasets.py index 63f95e8..463b58b 100644 --- a/saved_datasets.py +++ b/saved_datasets.py @@ -1,56 +1,62 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ Saved datasets module. """ import numpy as np import logging import os import torch from torch.utils.data import TensorDataset, DataLoader from paths import SAVED_DATA from configuration import * from graph import shift_laplacian +from utils import get_dim, count_class_freq logger = logging.getLogger(__name__) -def get_dim(data): - """Get the dimension of the input image.""" - dim = len(data[0]) - return dim - def load_saved_dataset(name, data_path=SAVED_DATA): """Load the saved data.""" train_data = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_train_signals.npy'))).float() valid_data = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_val_signals.npy'))).float() test_data = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_test_signals.npy'))).float() train_labels = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_train_labels.npy'))).long() valid_labels = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_val_labels.npy'))).long() test_labels = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_test_labels.npy'))).long() train_dataset = TensorDataset(train_data, train_labels) valid_dataset = TensorDataset(valid_data, valid_labels) test_dataset = TensorDataset(test_data, test_labels) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True) valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True) test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, drop_last=True) dim = int(np.sqrt(get_dim(train_data))) laplacian_matrix = torch.from_numpy(np.load(os.path.join(data_path, name, 'dataset', name + '_laplacian.npy'))).float() shifted_laplacian_matrix = shift_laplacian(laplacian_matrix, dim).to(DEVICE) - logger.info('Loaded dataset from the saved {} dataset.'.format(name)) + if name == 'mnist_012': + num_classes = 3 + elif name == 'eth80': + num_classes = 8 + else: + num_classes = 9 + + logger.info('Class frequency \ntrain loader: {} \nvalidation loader: {} \ntest loader: {}'.format( + count_class_freq(train_loader, num_classes),count_class_freq(valid_loader, num_classes), count_class_freq(test_loader, num_classes)) + ) + logging.info('Loaded saved {} dataset with the split {}-{}-{} for the [train]-[valid]-[test] setup.'.format(name, len(train_loader)*BATCH_SIZE, len(valid_loader)*BATCH_SIZE, len(test_loader)*BATCH_SIZE)) return train_loader, valid_loader, test_loader, dim, laplacian_matrix, shifted_laplacian_matrix diff --git a/train.py b/train.py index 1ee0cf1..c8a1f87 100644 --- a/train.py +++ b/train.py @@ -1,150 +1,151 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ Main module. """ import numpy as np import datetime import logging from tqdm import tqdm import time import sys from torch.autograd import Variable from datasets import load_dataset from saved_datasets import load_saved_dataset from graph import compute_laplacians from utils import snapshot, load_pretrained_model from plot import plot_loss, plot_error from paths import SAVED_MODELS_DIR from configuration import * from models import * logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def load_model(dataset_name, dim, laplacian_matrix, shifted_laplacian_matrix): """Load the model associated with the dataset.""" if dataset_name == 'mnist_012': model = TIGraNet_mnist_012( dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, freeze_sc_weights=True ) elif dataset_name == 'mnist_rot': model = TIGraNet_mnist_rot( dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, freeze_sc_weights=True ) elif dataset_name == 'mnist_trans': model = TIGraNet_mnist_trans( dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, freeze_sc_weights=True ) elif dataset_name == 'eth80': model = TIGraNet_eth80( dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, freeze_sc_weights=True ) return model # get arguments from command line if len(sys.argv) != 2: print('Usage: python3 train.py [DATASET]') sys.exit(1) else: dataset_name = sys.argv[-1] if dataset_name not in ['mnist_012', 'mnist_rot', 'mnist_trans', 'eth80']: print('DATASET available: mnist_012, mnist_rot, mnist_trans or eth80') sys.exit(1) # prepare data and model train_loader, valid_loader, _, dim, laplacian_matrix, shifted_laplacian_matrix = load_saved_dataset(name=dataset_name) model = load_model(dataset_name=dataset_name, dim=dim, laplacian_matrix=laplacian_matrix, shifted_laplacian_matrix=shifted_laplacian_matrix) # pass it to GPU if available model.to(DEVICE) logging.info('Training...') RUN_TIME = '{:%Y-%m-%d_%H-%M}'.format(datetime.datetime.now()) RUN_NAME = '{}_{}_{}_{:.0e}'.format( type(model).__name__, type(model.optimizer).__name__, #'F' if model.freeze_sc_weights else 'NF', BATCH_SIZE, LEARNING_RATE ) epoch = 0 best_error = (0,100) loss_history = [] error_history = [] while True: # train the model loss_train = 0 acc_train = 0 for data, target in tqdm(train_loader, desc='Training', leave=False): data, target = data.to(DEVICE), target.to(DEVICE) loss = model.step(data, target, train=True) loss_train += loss y_pred = model.predict(data) acc_train += torch.eq(y_pred.cpu(),target.cpu()).sum().item() + # validate the model loss_valid = 0 acc_valid = 0 for data, target in tqdm(valid_loader, desc='Validation', leave=False): - data = data.to(DEVICE) + data, target = data.to(DEVICE), target.to(DEVICE) loss = model.step(data, target, train=False) loss_valid += loss y_pred = model.predict(data) acc_valid += torch.eq(y_pred.cpu(),target.cpu()).sum().item() # print some metrics train_samples_size = len(train_loader) * BATCH_SIZE valid_samples_size = len(valid_loader) * BATCH_SIZE loss_train_epoch = loss_train / train_samples_size loss_valid_epoch = loss_valid / valid_samples_size error_train_epoch = 100 - 100 * (acc_train / train_samples_size) error_valid_epoch = 100 - 100 * (acc_valid / valid_samples_size) error_history.append((error_train_epoch, error_valid_epoch)) loss_history.append((loss_train_epoch, loss_valid_epoch)) print('Epoch: {} train loss: {:.5f} valid loss: {:.5f} train error: {:.2f} % valid error: {:.2f} %'.format(epoch, loss_train_epoch, loss_valid_epoch, error_train_epoch, error_valid_epoch)) # check if model is better if error_valid_epoch < best_error[1]: best_error = (epoch, error_valid_epoch) - snapshot(SAVED_MODELS_DIR, RUN_TIME, RUN_NAME, True, epoch, error_valid_epoch, model.state_dict()) + snapshot(SAVED_MODELS_DIR, RUN_TIME, RUN_NAME, True, epoch, error_valid_epoch, model.state_dict(), model.optimizer.state_dict()) # check that the model is not doing worst over the time if best_error[0] + PATIENCE < epoch : print('Overfitting. Stopped at epoch {}.' .format(epoch)) break epoch += 1 plot_loss(RUN_TIME, RUN_NAME, loss_history) plot_error(RUN_TIME, RUN_NAME, error_history) diff --git a/utils.py b/utils.py index 37cb988..3b069af 100644 --- a/utils.py +++ b/utils.py @@ -1,154 +1,156 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ Utilitary functions. """ import numpy as np import matplotlib.pyplot as plt import random import glob import logging import torch import torchvision from torch.autograd import Variable from torch.utils.data.sampler import SubsetRandomSampler from configuration import * random.seed(SEED) logger = logging.getLogger(__name__) def select(dataset, size, digits_to_keep, stratified_sampling=False): """Select randomly specific elements given by digits_to_keep.""" len_dataset = len(dataset) indices = list(range(len_dataset)) random_select_indices = [] random.shuffle(indices) if stratified_sampling: num_classes = len(digits_to_keep) classes = [[] for _ in range(num_classes)] for i in indices: if dataset[i][1] in digits_to_keep: classe = dataset[i][1] classes[classe].append(i) for i in range(np.min([len(classes[0]), len(classes[1]), len(classes[2])])): for j in range(num_classes): if len(random_select_indices) < size: random_select_indices.append(classes[j][i]) else: break else: for i in indices: if len(random_select_indices) < size and dataset[i][1] in digits_to_keep: random_select_indices.append(i) return random_select_indices def train_valid_split(dataset, train_size): """Split the dataset into training and validaiton set.""" len_dataset = len(dataset) indices = list(range(len_dataset)) train_indices = indices[:train_size] valid_indices = indices[train_size:] return SubsetRandomSampler(train_indices), SubsetRandomSampler(valid_indices) def train_valid_test_split(dataset, train_size, valid_size): """Split the dataset into training, validation and testing set.""" indices = list(range(len(dataset))) random.shuffle(indices) train_indices, valid_indices, test_indices = indices[:train_size], indices[train_size:train_size+valid_size], indices[train_size+valid_size:] return SubsetRandomSampler(train_indices), SubsetRandomSampler(valid_indices), SubsetRandomSampler(test_indices) def imshow_data_loader(data_loader, eth80_class_names=[]): """Show image provided by the data loader.""" # get a batch of data inputs, classes = next(iter(data_loader)) out = torchvision.utils.make_grid(tensor=inputs) # get the corresponding values if eth80_class_names: title = [eth80_class_names[x] for x in classes] mean = ETH80_MEAN std = ETH80_STD else: title = [x for x in classes] mean = MNIST_MEAN std = MNIST_STD # build the original image out = out.numpy().transpose((1, 2, 0)) out = std * out + mean out = np.clip(out, 0, 1) # display it plt.imshow(out) plt.title(title) plt.show() def show_spectrum(tensor, num_filters): """Show the spectrum of the spectral layer. """ return NotImplemented -def snapshot(saved_model_dir, run_time, run_name, is_best, epoch, err_epoch, state_dict): - """Save the model state.""" - - # Write the full name - if is_best: - complete_name = '{}{}_{}_{}_{:.2f}_best'.format(saved_model_dir, run_time, run_name, epoch, err_epoch) - else: - complete_name = '{}{}_{}_{}_{:.2f}'.format(saved_model_dir, run_time, run_name, epoch, err_epoch) - +def snapshot(saved_model_dir, run_time, run_name, is_best, epoch, err_epoch, model_state_dict, optim_state_dict): + """Save the model state.""" + + complete_name = '{}{}_{}_{}_{:.2f}'.format(saved_model_dir, run_time, run_name, epoch, err_epoch) + + states = { + 'model': model_state_dict, + 'optimizer': optim_state_dict + } + # Save the model - with open(complete_name + '.pt', 'wb') as f: - torch.save(state_dict, f) + with open(complete_name + '.pt', 'wb') as f: + torch.save(states, f) def load_pretrained_model(saved_model_dir, run_name, model): """Load the specified model.""" - model_state = glob.glob(saved_model_dir + run_name)[0] + states = glob.glob(saved_model_dir + run_name)[0] if torch.cuda.is_available(): - model.load_state_dict(torch.load(model_state)) - model.cuda() + checkpoint = torch.load(states) else: - state_dict = torch.load(model_state, map_location=lambda storage, loc: storage) + checkpoint = torch.load(states, map_location=lambda storage, loc: storage) - # in case we load state_dict with different architecture (subset) - from collections import OrderedDict - new_state_dict = OrderedDict() - params_name = [] - for name, _ in model.named_parameters(): - params_name.append(name) - - for k, v in state_dict.items(): - if k in params_name: - new_state_dict[k] = v - - # load params - model.load_state_dict(new_state_dict) - #model.load_state_dict(torch.load(model_state, map_location=lambda storage, loc: storage)) + model.load_state_dict(checkpoint['model']) + model.optimizer.load_state_dict(checkpoint['optimizer']) logging.info('Loaded {} model.'.format(run_name)) return model def init_mask(num_nodes, batch_size): """Initialize the nodes of interest by including all the nodes of the graph.""" mask = Variable(torch.ones(batch_size, num_nodes, 1)).to(DEVICE) - return mask \ No newline at end of file + return mask + +def count_class_freq(loader, num_classes): + """Return the frequency for each class from the loader.""" + + t = np.zeros(num_classes) + for _, target in loader: + for c in target: + t[c] +=1 + return t + +def get_dim(data): + """Get the dimension of the input image.""" + dim = len(data[0]) + return dim \ No newline at end of file