In [1]:
from IPython.display import display
from PIL import Image
path="D:\Regression\equation_lr_multi.png"
display(Image.open(path))
In [2]:
#Import les libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
#from sklearn.linear_model import LinearRegression
In [3]:
#Import Dataset
dataset = pd.read_csv("d:\Regression\Advertising.csv")
dataset
Out[3]:
In [4]:
X = np.array(dataset[['TV','Radio','Newspaper']])
y = np.array(dataset['Sales'])
print(y.shape)
In [5]:
plt.scatter(X[:,0], y)
Out[5]:
In [6]:
plt.scatter(X[:,1], y)
Out[6]:
In [7]:
plt.scatter(X[:,2], y)
Out[7]:
In [8]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.scatter(dataset["TV"], dataset["Radio"], dataset["Sales"], c = 'r', marker = '^')
ax.set_xlabel('TV')
ax.set_ylabel('Radio')
ax.set_zlabel('Sales')
plt.show
Out[8]:
In [9]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.scatter(dataset["TV"], dataset["Newspaper"], dataset["Sales"], c = 'g', marker = '^')
ax.set_xlabel('TV')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')
plt.show
Out[9]:
In [10]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.scatter(dataset["Radio"], dataset["Newspaper"], dataset["Sales"], c = 'b', marker = '^')
ax.set_xlabel('Radio')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')
plt.show
Out[10]:
In [11]:
#We add an additional column composed by 1 on X matrix 
X = np.array(dataset[['TV','Radio','Newspaper']])
X = np.c_[X, np.ones(X.shape[0])]
X
Out[11]:
In [12]:
# We are checking that the dimension of X is m x (n + 1) ; m = 200, n = 3
print(X.shape)
In [13]:
path="D:\Regression\model_ftheta_2.png"
display(Image.open(path))
In [14]:
# Initialisation du vecteur Theta
theta = np.random.randn(4,1)
theta
Out[14]:
In [15]:
# Dimension of theta is (n + 1) x 1; n = 3
print(theta.shape)
In [16]:
def model(X, theta):
    return X.dot(theta)
In [17]:
# We are checking that dimension of X.theta is m x 1; m = 200
F = model(X, theta)
print(F.shape)
In [18]:
path="D:\Regression\cost_function_2.png"
display(Image.open(path))
In [19]:
print(y.shape)  #We can notice that, we don't have a second dimension of y
In [20]:
y = y.reshape(y.shape[0], 1)  # We can use reshape function to add a second dimension to y vector. 
                              #We used it to be able to calculate X.theta - y
print(y.shape)
In [21]:
G =  model(X, theta) - y
In [22]:
print(G.shape)
In [23]:
def cost_function(X, y, theta):
    m = len(y)
    n = 1/(2*m) * np.sum((model(X, theta) - y)**2)
    return n 
In [24]:
n = cost_function (X, y, theta)
n
Out[24]:
In [25]:
path="D:\Regression\gradients.png"
display(Image.open(path))
In [26]:
def grad(X, y, theta):
    m = len(y)
    return 1/m * X.T.dot(model(X, theta) - y)
In [27]:
print(X.T.shape)  #Dimension of X.T must be (n+ 1) X m
In [28]:
z = grad(X, y, theta)  # Dimension of grad(X, y, theta) must be (n +1) x 1
print(z.shape)
In [29]:
path="D:\Regression\gradient_descent_2.png"
display(Image.open(path))
In [30]:
def grandient_descent(X, y, theta, learning_rate, n_iterations):
    cost_history = np.zeros(n_iterations)
    for i in range(0, n_iterations):
        theta = theta - learning_rate * grad(X, y, theta)
        cost_history[i] = cost_function(X, y, theta)
    return theta,cost_history
In [31]:
n_it = 4000
In [32]:
theta_final, cost_history = grandient_descent(X, y, theta, learning_rate = 0.00001, n_iterations = n_it)
In [33]:
plt.plot(range(n_it), cost_history)
Out[33]:
In [34]:
# We can see that from 4000 iterations the model converges
In [35]:
predictions = model(X, theta_final)
In [36]:
my_pred = model(np.array([151.5,41.3,58.5,1]), theta_final)
my_pred
Out[36]:
In [37]:
path="D:\Regression\determination_coef.png"
display(Image.open(path))
In [38]:
#This rate allow to mesure the percentage of 
def determination_rate(y, pred):
    u = ((y - pred)**2).sum()
    v = ((y - y.mean())**2).sum()
    return 1 - u/v
In [39]:
determination_rate(y, predictions)
Out[39]:
In [40]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.set_xlabel('TV')
ax.set_ylabel('Radio')
ax.set_zlabel('Sales')
ax.scatter(dataset["TV"], dataset["Radio"], dataset["Sales"], c = 'b', marker = '^')
ax.scatter(dataset["TV"], dataset["Radio"], predictions, c = 'r', marker = '^')
plt.show()
In [41]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.set_xlabel('TV')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')
ax.scatter(dataset["TV"], dataset["Newspaper"], dataset["Sales"], c = 'g', marker = '^')
ax.scatter(dataset["TV"], dataset["Newspaper"], predictions, c = 'r', marker = '^')
plt.show()
In [42]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.set_xlabel('Radio')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')
ax.scatter(dataset["Radio"], dataset["Newspaper"], dataset["Sales"], c = 'y', marker = '^')
ax.scatter(dataset["Radio"], dataset["Newspaper"], predictions, c = 'r', marker = '^')
plt.show()
In [ ]:
  
