diff --git a/lab03/homework.ipynb b/lab03/homework.ipynb new file mode 100644 index 0000000..12130fb --- /dev/null +++ b/lab03/homework.ipynb @@ -0,0 +1,122 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "EE-311\n", + "======\n", + "\n", + "Lab 3: logistic regression and gradient descent\n", + "----------------------------------------\n", + "\n", + "created by Francois Marelli on 05.03.2020" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Homework\n", + "\n", + "The file `homework.py` contains the homework of the week. It contains empty functions that must be completed according to the instructions.\n", + "\n", + "When the homework is completed, it must be submitted on Moodle for grading.\n", + "\n", + "**Do not change the function definitions in the file!**\n", + "\n", + "\n", + "## Logistic regression: gradient descent\n", + "\n", + "In the lab session, we have seen what a logistic regression model is, and how it can be used to solve binary classification problems. We have also seen how to use gradient descent algorithms to find numerical solutions to optimization problems.\n", + "\n", + "The objective of this homework is to manually implement and train a logistic regression model for binary classification.\n", + "\n", + "(Reminder: the logistic regression model is defined as $f: X \\rightarrow \\sigma(X \\vec{\\beta})$)\n", + "\n", + "The first task is to implement the model itself: given its parameters $\\vec{\\beta}$ and input data, compute the binary labels predicted by the model.\n", + "\n", + "The second task is to implement the gradient of the logistic loss function (carefully read the slides of course 03) so that the model can be trained using gradient descent (already provided in the homework). As explained in the lab, using an averaged loss is better in practice, so you are asked to compute the averaged gradient over the dataset.\n", + "\n", + "You should not edit the given gradient descent implementation.\n", + "\n", + "The following cell can be used for manual testing.\n", + "\n", + "**Hints:**\n", + "\n", + "Task 1:\n", + "\n", + "*Remember when we plotted the class probabilities? How do you choose the most probable class?* \n", + "\n", + "Task 2:\n", + "\n", + "*Do not forget to add a column of ones, just like in the lab.*\n", + "\n", + "*Pay attention to the sign of the derivative.*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import homework\n", + "\n", + "import importlib\n", + "importlib.reload(homework)\n", + "\n", + "# Test your functions!\n", + "#\n", + "# homework.logistic_loss_derivative(data_X, data_y, beta)\n", + "# homework.model_prediction(data_X, beta)\n", + "# homework.gradient_descent(data_X, data_y, beta_0, stepsize, iterates)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test cell\n", + "\n", + "Run this cell to check whether your homework passes the test." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import unittest\n", + "import testing\n", + "\n", + "import importlib\n", + "importlib.reload(testing)\n", + "\n", + "unittest.main(module=testing, argv=['first-arg-is-ignored'], exit=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2-final" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/lab03/homework.py b/lab03/homework.py new file mode 100644 index 0000000..2f06d48 --- /dev/null +++ b/lab03/homework.py @@ -0,0 +1,70 @@ +""" +EE-311 +====== + +Lab 3: logistic regression and gradient descent +----------------------------------------------- + +created by Francois Marelli on 05.03.2020 +""" + +import numpy as np + + +def model_prediction(data_X, beta): + """ + Compute the predicted labels for the points in data_X using a logistic regression model + + Parameters: + data_X : ndarray of shape (N, M) with N the number of points and M the number of features + beta: ndarray of shape (M+1) containing the model parameters, beta[0] being the independent term + + Returns: + labels: an array of shape (N) containing the model class predictions (0/1) + """ + + return + + +def logistic_loss_derivative(data_X, data_y, beta): + """ + Compute the gradient of the averaged logistic loss function for a logistic regression model + + Parameters: + data_X : ndarray of shape (N, M) with N the number of points and M the number of features + data_y: ndarray of shape (N) containing the labels (0/1) + beta: ndarray of shape (M+1) containing the model parameters, beta[0] being the independent term + + Returns: + grad: an array of shape (M+1) containing the gradient of the logistic loss + function with respect to beta averaged over data_X + """ + + return + + +def gradient_descent(data_X, data_y, beta_0, stepsize, iterates): + """ + A gradient descent algorithm, returns the trained model + + DO NOT EDIT + + Parameters: + data_X : ndarray of shape (N, M) with N the number of points and M the number of features + data_y: ndarray of shape (N) containing the labels (0/1) + beta: ndarray of shape (M+1) containing the initial model parameters, beta[0] being the independent term + stepsize: float, the step size of the gradient descent + iterates: number of iterations to perform + + Returns: + beta: ndarray of shape (M+1) containing the trained model parameters, beta[0] being the independent term + + """ + + beta = beta_0 + + for _ in range(iterates): + dloss = logistic_loss_derivative(data_X, data_y, beta) + beta -= dloss * stepsize + + return beta diff --git a/lab03/lab.ipynb b/lab03/lab.ipynb new file mode 100644 index 0000000..3e3f394 --- /dev/null +++ b/lab03/lab.ipynb @@ -0,0 +1,409 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "EE-311\n", + "======\n", + "\n", + "Lab 3: logistic regression and gradient descent\n", + "----------------------------------------\n", + "\n", + "created by Francois Marelli on 05.03.2020" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Import libraries\n", + "\n", + "*The seaborn library is a high-level plotting library based on Matplotlib, used for statistical data visualization*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import load_iris, load_wine\n", + "from sklearn.linear_model import LogisticRegression, LinearRegression\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load and prepare data\n", + "\n", + "For this lab, we will use the iris dataset (). It contains 100 samples representing different types of irises using 4 features (width and length of sepals and petals).\n", + "\n", + "The original dataset contains 3 classes (types of irises), but we want to train a binary classifier. We will remove the last class for this session.\n", + "\n", + "This time, we use `train_test_split` from `sklearn` to generate train and test subsets. As always, it is important to start with knowing the properties of the dataset we work on. Answer the following:\n", + "\n", + "1. Is the dataset balanced?\n", + "2. How about the train and test splits?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_X, data_y = load_iris(return_X_y=True)\n", + "\n", + "# Reduce the dataset to a binary problem\n", + "mask = (data_y == 0) | (data_y == 1)\n", + "\n", + "data_X = data_X[mask]\n", + "data_y = data_y[mask]\n", + "\n", + "# Split into train and test subsets randomly, using 15% for test\n", + "X_train, X_test, y_train, y_test = train_test_split(data_X, data_y,\n", + " test_size=0.15, random_state=0)\n", + "\n", + "\n", + "###############################################################\n", + "# Code here\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Some utility functions\n", + "\n", + "We provide this function to easily plot the histograms of our dataset. We will use it throughout this lab." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_density(dataset, labels):\n", + " classes = (dataset[labels == 0], dataset[labels == 1])\n", + "\n", + " sns.displot(classes, fill=True, kind='kde')\n", + "\n", + " plt.title('Density of the classes')\n", + " plt.xlabel('Data X')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1D binary classification\n", + "\n", + "We want to be able to predict the class of a sample based on its features. This is a binary classification problem, and we will solve it using a logistic regression model.\n", + "\n", + "Let us first consider only the first dimension of the data using `X[:, 0]`\n", + "\n", + "From the density plot, try to guess if the logistic regression classifier will have a high accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_density(data_X[:, 0], data_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train the classifier and check the performance\n", + "\n", + "What is the performance score for a logistic regression model?\n", + "\n", + "Implement it on your own and check that the results match." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_1D = X_train[:, 0, np.newaxis]\n", + "X_test_1D = X_test[:, 0, np.newaxis]\n", + "\n", + "classifier = LogisticRegression().fit(X_train_1D, y_train)\n", + "\n", + "accuracy = classifier.score(X_test_1D, y_test)\n", + "print('Classification accuracy: {:.2f}%'.format(accuracy * 100))\n", + "\n", + "############################################\n", + "# Code here to compute the accuracy\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What's happening inside?\n", + "\n", + "Here is a plot of the class probabilities predicted by the model (obtained with `predict_proba`). It is what the model uses to make a decision between the two classes (note that the sum of the probabilities for both classes is always 1).\n", + "\n", + "Code the equations of the logistic regression to reproduce the plot below using the parameters of the trained model. Use `plt.plot(x, y, '*')` for better readability. Remember that a logistic regression is just a linear regression on which we apply the logistic function!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_plot = np.arange(4, 8, 0.1)[..., np.newaxis]\n", + "proba = classifier.predict_proba(X_plot)\n", + "\n", + "plt.figure(figsize=(10,7))\n", + "\n", + "###################################################\n", + "# Add your code here!\n", + "\n", + "\n", + "\n", + "###################################################\n", + "\n", + "plt.plot(X_plot, proba, label='P(Y=.|X), model')\n", + "plt.title('Predicted probability of each class')\n", + "plt.xlabel('Data X')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The decision boundary\n", + "\n", + "The decision boundary of a model is the hypersurface that separates the two classes for decision making, the frontier at which the decision transitions from 0 to 1.\n", + "\n", + "1. For a logistic regression, what specific type of hypersurface is the decision boundary?\n", + "\n", + "1. In this simple example, what is the decision boundary?\n", + "\n", + "3. Compute and plot the decision boundary on the densities using `plt.axvline`. How do you interpret it? " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_density(data_X[:, 0], data_y)\n", + "\n", + "\n", + "##########################################################\n", + "# Add your code to compute and plot the decision boundary\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Other features of X\n", + "\n", + "Let us now work on the second feature `X[:, 1]` instead (still a 1D problem).\n", + "\n", + "1. Plot the histograms. Do you think the model is going to perform better than when trained on the first feature? Why?\n", + "Train the model and print the accuracy to check.\n", + "\n", + "2. How about the second feature `X[:, 2]`? Repeat the steps." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Time to code!\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 2D binary classification\n", + "\n", + "We saw that the data are not perfectly separable when using either one the two first features, and that the logistic regression model could not achieve 100% accuracy. What if we use both simultaneously?\n", + "\n", + "Here is a scatter plot of our dataset with 2 features.\n", + "\n", + "1. What do you think about its separability? What performance should we expect for the classifier?\n", + "\n", + "2. In this case, what is the decision boundary going to be?\n", + "\n", + "Train a logistic regression model on the 2D data, print its accuracy and plot its decision boundary on the scatter plot." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 7))\n", + "plt.scatter(data_X[:, 0], data_X[:, 1], c=data_y)\n", + "plt.xlabel('Feature 0')\n", + "plt.ylabel('Feature 1')\n", + "\n", + "# Keep the 2 first features of the dataset\n", + "X_train_2D = X_train[:, 0:2]\n", + "X_test_2D = X_test[:, 0:2]\n", + "\n", + "###############################################\n", + "# Here you go!\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Gradient descent\n", + "\n", + "Last time, we have computed the analytical solution of the linear regression (with L2 loss) problem $y = \\beta_0 + \\beta_1 X$.\n", + "\n", + "Even if we know the analytical solution of the problem, we could use a gradient descent method to compute it (for demonstration purposes).\n", + "\n", + "The numerical result should closely match the analytical solution if done right.\n", + "\n", + "Here is a plot of the loss function (L2) as a function of $\\vec{\\beta}$, and a skeleton code for the gradient descent algorithm.\n", + "\n", + "Implement the derivative function and the model update so that the gradient descent converges to the minimum of the loss function. \n", + " \n", + "Hint: do not forget the importance of the step size!\n", + "\n", + "1. Does it give the same result as the analytical method? \n", + "\n", + "2. What happens if you change the initial value of $\\beta$ to be further away from the solution?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wine_full, _ = load_wine(return_X_y=True)\n", + "wine_X = wine_full[:, np.newaxis, 9]\n", + "wine_y = wine_full[:, 0]\n", + "\n", + "# Normalization makes our life easier\n", + "wine_X = (wine_X - wine_X.mean(0)) / wine_X.std(0)\n", + "\n", + "regr = LinearRegression().fit(wine_X, wine_y)\n", + "\n", + "# Add a column of 1 to the data X\n", + "ones = np.ones((wine_X.shape[0], 1))\n", + "wine_X = np.concatenate([ones, wine_X], axis=1)\n", + "\n", + "beta = np.array((regr.intercept_, regr.coef_[0]))\n", + "\n", + "\n", + "def loss_function(y_predict, y_true):\n", + " return np.power(y_true - y_predict, 2).mean(0)\n", + "\n", + "\n", + "def derivative_function(y_predict, y_true, x):\n", + " \n", + " ###############################################\n", + " # Code here to implement the derivative\n", + "\n", + " \n", + "\n", + " ###############################################\n", + "\n", + "\n", + "\n", + "# Compute and plot the loss function at every value of b\n", + "b0 = np.linspace(-10, 50, 100)\n", + "b1 = np.linspace(-20, 20, 100)\n", + "\n", + "beta_range = np.stack(np.meshgrid(b0, b1), 1)\n", + "pred = wine_X.dot(beta_range)\n", + "loss_map = loss_function(pred, wine_y[:, np.newaxis, np.newaxis])\n", + "\n", + "# Plotting in log scale for better readability\n", + "plt.figure(figsize=(10,7))\n", + "plt.imshow(np.log10(loss_map), extent=[b0[0], b0[-1], b1[0], b1[-1]], aspect='auto', origin='lower')\n", + "plt.colorbar()\n", + "plt.xlabel('$\\\\beta_0$')\n", + "plt.ylabel('$\\\\beta_1$')\n", + "plt.title('L2 loss (log scale)')\n", + "\n", + "# Initial value for b (arbitrary)\n", + "b = np.array([0, 0])\n", + "\n", + "plt.plot(b[0], b[1], 'o', color='C3', markersize=15, label='Initial')\n", + "\n", + "\n", + "# Gradient descent algorithm\n", + "for _ in range(30):\n", + "\n", + " ##############################################################\n", + " # Update the model here!\n", + " # The choice of the stepsize is crucial to avoid oscillations\n", + " \n", + " \n", + "\n", + " ##############################################################\n", + "\n", + " # Plot the evolution\n", + " plt.plot(b[0], b[1], 'o', color='C1')\n", + "\n", + "\n", + "plt.plot(b[0], b[1], 'o', color='C2', markersize=10, label='Final')\n", + "plt.legend()\n", + "plt.show()\n", + "\n", + "print('Numerical value of beta:', b)\n", + "print('Analytical value of beta:', beta)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2-final" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/lab03/test_data.npy b/lab03/test_data.npy new file mode 100644 index 0000000..a1de817 Binary files /dev/null and b/lab03/test_data.npy differ diff --git a/lab03/testing.py b/lab03/testing.py new file mode 100644 index 0000000..949a6ab --- /dev/null +++ b/lab03/testing.py @@ -0,0 +1,87 @@ +""" +EE-311 +====== + +Lab 3: logistic regression and gradient descent +---------------------------------------- + +created by Francois Marelli on 05.03.2020 +""" + +import unittest +import numpy as np +import homework + +from sklearn.datasets import load_iris +from sklearn.linear_model import LogisticRegression + +import importlib + +importlib.reload(homework) + + +class Test(unittest.TestCase): + def test_model_prediction(self): + data_X, data_y = load_iris(return_X_y=True) + + mask = (data_y == 0) | (data_y == 1) + + data_X = data_X[mask] + data_y = data_y[mask] + + classifier = LogisticRegression().fit(data_X, data_y) + beta = np.concatenate((classifier.intercept_, classifier.coef_[0, :])).squeeze() + p = homework.model_prediction(data_X.copy(), beta.copy()) + p2 = classifier.predict(data_X) + + np.testing.assert_array_equal(p, p2) + + sample_X = np.zeros_like(data_X[0]) + sample_X[0] = (0.25 - beta[0]) / beta[1] + np.testing.assert_array_equal( + homework.model_prediction(sample_X[None, :], beta), 1 + ) + + def test_gradient(self): + data_X, data_y = load_iris(return_X_y=True) + + mask = (data_y == 0) | (data_y == 1) + + data_X = data_X[mask] + data_y = data_y[mask] + + data_X = data_X[:-5] + data_y = data_y[:-5] + + grad_ref = np.load("test_data.npy") + + beta_0 = np.zeros_like(grad_ref) + grad_out = homework.logistic_loss_derivative( + data_X.copy(), data_y.copy(), beta_0 + ) + + np.testing.assert_allclose(grad_out, grad_ref, atol=1e-9, rtol=1e-5) + + def test_gradient_descent(self): + data_X, data_y = load_iris(return_X_y=True) + + mask = (data_y == 0) | (data_y == 1) + + data_X = data_X[mask] + data_y = data_y[mask] + + classifier = LogisticRegression().fit(data_X, data_y) + beta = np.concatenate((classifier.intercept_, classifier.coef_[0, :])) + + beta_0 = np.zeros_like(beta) + beta_out = homework.gradient_descent( + data_X.copy(), data_y.copy(), beta_0, 1e-1, 5 + ) + + p = homework.model_prediction(data_X.copy(), beta_out) + p2 = classifier.predict(data_X) + np.testing.assert_array_equal(p, p2) + + +if __name__ == "__main__": + unittest.main()