diff --git a/README.md b/README.md new file mode 100644 index 0000000..3a29c98 --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +# weightmatrices creation for initialising the first layer of a neural network + +## Dependencies & Environment + +- Conda needs to be installed +- Setup the conda environment `weightmatrices` by running: + + ```bash + bash setup_dependencies.sh + ``` + +## Usage + +- To run the main script(s) that produce the weight matrices run: + + ``` bash + bash run.sh + ``` + If you want to run specific scripts, do not forget to activate the environment by running: + + ``` bash + conda activate weightmatrices + ``` + +## Remarks + +By default, Datasets are downloaded and put into a /datasets directory. +To avoid this you can: +1. Change the root variable in the torchvision data loader function. +2. Create the /datasets folder and corresponding subfolder and put the datasets (or a link) in there. diff --git a/env.yml b/env.yml new file mode 100644 index 0000000..6366757 --- /dev/null +++ b/env.yml @@ -0,0 +1,49 @@ +name: weightmatrices +channels: + - pytorch + - brian-team + - defaults +dependencies: + - blas=1.0=mkl + - ca-certificates=2020.1.1=0 + - certifi=2020.4.5.2=py37_0 + - freetype=2.10.2=ha233b18_0 + - intel-openmp=2019.4=233 + - joblib=0.15.1=py_0 + - jpeg=9b=he5867d9_2 + - libcxx=10.0.0=1 + - libedit=3.1.20191231=haf1e3a3_0 + - libffi=3.3=h0a44026_1 + - libgfortran=3.0.1=h93005f0_2 + - libpng=1.6.37=ha441bb4_0 + - libtiff=4.1.0=hcb84e12_1 + - llvm-openmp=10.0.0=h28b9765_0 + - lz4-c=1.9.2=h0a44026_0 + - mkl=2019.4=233 + - mkl-service=2.3.0=py37hfbe908c_0 + - mkl_fft=1.1.0=py37hc64f4ea_0 + - mkl_random=1.1.1=py37h959d312_0 + - ncurses=6.2=h0a44026_1 + - ninja=1.9.0=py37h04f5b5a_0 + - numpy=1.18.1=py37h7241aed_0 + - numpy-base=1.18.1=py37h3304bdc_1 + - olefile=0.46=py37_0 + - openssl=1.1.1g=h1de35cc_0 + - pillow=7.1.2=py37h4655f20_0 + - pip=20.1.1=py37_1 + - python=3.7.7=hf48f09d_4 + - pytorch=1.5.1=py3.7_0 + - readline=8.0=h1de35cc_0 + - scikit-learn=0.22.1=py37h27c97d8_0 + - scipy=1.4.1=py37h9fa6033_0 + - setuptools=47.3.0=py37_0 + - six=1.15.0=py_0 + - sqlite=3.32.2=hffcf06c_0 + - tk=8.6.10=hb0a8c7a_0 + - torchvision=0.6.1=py37_cpu + - wheel=0.34.2=py37_0 + - xz=5.2.5=h1de35cc_0 + - zlib=1.2.11=h1de35cc_3 + - zstd=1.4.4=h1990bb4_3 +prefix: /Users/Bernd/anaconda3/envs/weightmatrices + diff --git a/main.py b/main.py new file mode 100644 index 0000000..aceddd4 --- /dev/null +++ b/main.py @@ -0,0 +1,33 @@ +import torch +import numpy as np +import random +import matplotlib +matplotlib.use('TkAgg') +import matplotlib.pyplot as plt + +# own package +from weightmatrices.utils import utils +from weightmatrices.algos import pca + +n_h = 100 # replace by list of n_hs + +################################################################################ + +data_loader = utils.load_data() + +W = pca.get_weightmatrices_pca(data_loader, n_h) + +plt.imshow(W[random.sample(range(0, n_h), 1)[0], :].reshape(28, 28), cmap = 'gray') +plt.show() + +################################################################################ + +import os +if not os.path.exists('./results'): + os.mkdir('./results') + +np.save('./results/pca_test.npy', W) + +# jump to interactive mode +import code +code.interact(local=locals()) diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..ec2af33 --- /dev/null +++ b/run.sh @@ -0,0 +1,4 @@ +echo "Activating environment..." +source activate weightmatrices +echo "Creating weight matrices on EMNIST for algos: PCA, ICA, ..." +python main.py diff --git a/setup_dependencies.sh b/setup_dependencies.sh new file mode 100755 index 0000000..15759be --- /dev/null +++ b/setup_dependencies.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +echo "Make sure conda is installed." +echo "Installing environment:" +conda env create -f env.yml || conda env update -f env.yml || exit +conda activate weightmatrices diff --git a/weightmatrices/__init__.py b/weightmatrices/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/weightmatrices/algos/__init__.py b/weightmatrices/algos/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/weightmatrices/algos/pca.py b/weightmatrices/algos/pca.py new file mode 100644 index 0000000..eef03e0 --- /dev/null +++ b/weightmatrices/algos/pca.py @@ -0,0 +1,23 @@ + +import time +from tqdm import tqdm + +from sklearn.decomposition import IncrementalPCA +from weightmatrices.utils import utils + +def get_pca_trafo_matrix(data_loader, n_h): + transformer = IncrementalPCA(n_components=n_h) + for (d, t) in tqdm(data_loader): + s = d.shape + transformer.partial_fit(d.numpy().reshape(data_loader.batch_size, s[-1]*s[-2])) + + return transformer.components_ + +def get_weightmatrices_pca(data_loader, n_h): + print("Creating weigth matrix for PCA for "+str(n_h)+" hidden neurons...") + s = data_loader.dataset.data.shape + n_in_features = s[-1]*s[-2] + assert n_h <= n_in_features, "Number of requested principal components higher than input dimensionality!" + W = get_pca_trafo_matrix(data_loader, n_h) + W = utils.normalise_weightmatrix(W) + return W diff --git a/weightmatrices/utils/__init__.py b/weightmatrices/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/weightmatrices/utils/utils.py b/weightmatrices/utils/utils.py new file mode 100644 index 0000000..b726772 --- /dev/null +++ b/weightmatrices/utils/utils.py @@ -0,0 +1,19 @@ + +import torch +from torchvision import datasets, transforms +from sklearn.preprocessing import normalize + +def load_data(dataset='EMNIST', batch_size=100000): + if dataset=='EMNIST': + trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))]) + data = datasets.EMNIST('./datasets/', 'bymerge', transform=trans, download = True) + data_loader = torch.utils.data.DataLoader( + dataset=data, + batch_size=batch_size, + shuffle=False, + drop_last=True) + + return data_loader + +def normalise_weightmatrix(W): + return normalize(W)