jeudi 29 octobre 2020

How to solve Multiple Linear Regression in Python Using Gradient Descent

How to solve Multiple Linear Regression in Python Using Gradient Descent

In [1]:
from IPython.display import display
from PIL import Image
path="D:\Regression\equation_lr_multi.png"
display(Image.open(path))
In [2]:
#Import les libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D
#from sklearn.linear_model import LinearRegression

1. Dataset

In our case, we are using 200 data relating to a number of sales and investment in different media. We have 3 independent variables reassuring investments in terms of advertising by media.

m = 200, n = 3

In [3]:
#Import Dataset

dataset = pd.read_csv("d:\Regression\Advertising.csv")
dataset
Out[3]:

Unnamed: 0TVRadioNewspaperSales
01230.137.869.222.1
1244.539.345.110.4
2317.245.969.39.3
34151.541.358.518.5
45180.810.858.412.9
..................
19519638.23.713.87.6
19619794.24.98.19.7
197198177.09.36.412.8
198199283.642.066.225.5
199200232.18.68.713.4

200 rows × 5 columns

In [4]:
X = np.array(dataset[['TV','Radio','Newspaper']])

y = np.array(dataset['Sales'])
print(y.shape)
(200,)

When observing the target fees [y] just using [TV] feature

In [5]:
plt.scatter(X[:,0], y)
Out[5]:
<matplotlib.collections.PathCollection at 0x220c4d36bc8>

When observing the target fees [y] just using [Radio] feature

In [6]:
plt.scatter(X[:,1], y)
Out[6]:
<matplotlib.collections.PathCollection at 0x220c4df2808>

When observing the target fees [y] just using [Newspaper] feature

In [7]:
plt.scatter(X[:,2], y)
Out[7]:
<matplotlib.collections.PathCollection at 0x220c4e5ce08>

When observing the target fees [y] just using [TV] and [Radio] features

In [8]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')


ax.scatter(dataset["TV"], dataset["Radio"], dataset["Sales"], c = 'r', marker = '^')

ax.set_xlabel('TV')
ax.set_ylabel('Radio')
ax.set_zlabel('Sales')
plt.show
Out[8]:
<function matplotlib.pyplot.show(*args, **kw)>
In [9]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')


ax.scatter(dataset["TV"], dataset["Newspaper"], dataset["Sales"], c = 'g', marker = '^')

ax.set_xlabel('TV')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')
plt.show
Out[9]:
<function matplotlib.pyplot.show(*args, **kw)>
In [10]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')


ax.scatter(dataset["Radio"], dataset["Newspaper"], dataset["Sales"], c = 'b', marker = '^')

ax.set_xlabel('Radio')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')
plt.show
Out[10]:
<function matplotlib.pyplot.show(*args, **kw)>
In [11]:
#We add an additional column composed by 1 on X matrix 
X = np.array(dataset[['TV','Radio','Newspaper']])
X = np.c_[X, np.ones(X.shape[0])]
X
Out[11]:
array([[230.1,  37.8,  69.2,   1. ],
       [ 44.5,  39.3,  45.1,   1. ],
       [ 17.2,  45.9,  69.3,   1. ],
       [151.5,  41.3,  58.5,   1. ],
       [180.8,  10.8,  58.4,   1. ],
       [  8.7,  48.9,  75. ,   1. ],
       [ 57.5,  32.8,  23.5,   1. ],
       [120.2,  19.6,  11.6,   1. ],
       [  8.6,   2.1,   1. ,   1. ],
       [199.8,   2.6,  21.2,   1. ],
       [ 66.1,   5.8,  24.2,   1. ],
       [214.7,  24. ,   4. ,   1. ],
       [ 23.8,  35.1,  65.9,   1. ],
       [ 97.5,   7.6,   7.2,   1. ],
       [204.1,  32.9,  46. ,   1. ],
       [195.4,  47.7,  52.9,   1. ],
       [ 67.8,  36.6, 114. ,   1. ],
       [281.4,  39.6,  55.8,   1. ],
       [ 69.2,  20.5,  18.3,   1. ],
       [147.3,  23.9,  19.1,   1. ],
       [218.4,  27.7,  53.4,   1. ],
       [237.4,   5.1,  23.5,   1. ],
       [ 13.2,  15.9,  49.6,   1. ],
       [228.3,  16.9,  26.2,   1. ],
       [ 62.3,  12.6,  18.3,   1. ],
       [262.9,   3.5,  19.5,   1. ],
       [142.9,  29.3,  12.6,   1. ],
       [240.1,  16.7,  22.9,   1. ],
       [248.8,  27.1,  22.9,   1. ],
       [ 70.6,  16. ,  40.8,   1. ],
       [292.9,  28.3,  43.2,   1. ],
       [112.9,  17.4,  38.6,   1. ],
       [ 97.2,   1.5,  30. ,   1. ],
       [265.6,  20. ,   0.3,   1. ],
       [ 95.7,   1.4,   7.4,   1. ],
       [290.7,   4.1,   8.5,   1. ],
       [266.9,  43.8,   5. ,   1. ],
       [ 74.7,  49.4,  45.7,   1. ],
       [ 43.1,  26.7,  35.1,   1. ],
       [228. ,  37.7,  32. ,   1. ],
       [202.5,  22.3,  31.6,   1. ],
       [177. ,  33.4,  38.7,   1. ],
       [293.6,  27.7,   1.8,   1. ],
       [206.9,   8.4,  26.4,   1. ],
       [ 25.1,  25.7,  43.3,   1. ],
       [175.1,  22.5,  31.5,   1. ],
       [ 89.7,   9.9,  35.7,   1. ],
       [239.9,  41.5,  18.5,   1. ],
       [227.2,  15.8,  49.9,   1. ],
       [ 66.9,  11.7,  36.8,   1. ],
       [199.8,   3.1,  34.6,   1. ],
       [100.4,   9.6,   3.6,   1. ],
       [216.4,  41.7,  39.6,   1. ],
       [182.6,  46.2,  58.7,   1. ],
       [262.7,  28.8,  15.9,   1. ],
       [198.9,  49.4,  60. ,   1. ],
       [  7.3,  28.1,  41.4,   1. ],
       [136.2,  19.2,  16.6,   1. ],
       [210.8,  49.6,  37.7,   1. ],
       [210.7,  29.5,   9.3,   1. ],
       [ 53.5,   2. ,  21.4,   1. ],
       [261.3,  42.7,  54.7,   1. ],
       [239.3,  15.5,  27.3,   1. ],
       [102.7,  29.6,   8.4,   1. ],
       [131.1,  42.8,  28.9,   1. ],
       [ 69. ,   9.3,   0.9,   1. ],
       [ 31.5,  24.6,   2.2,   1. ],
       [139.3,  14.5,  10.2,   1. ],
       [237.4,  27.5,  11. ,   1. ],
       [216.8,  43.9,  27.2,   1. ],
       [199.1,  30.6,  38.7,   1. ],
       [109.8,  14.3,  31.7,   1. ],
       [ 26.8,  33. ,  19.3,   1. ],
       [129.4,   5.7,  31.3,   1. ],
       [213.4,  24.6,  13.1,   1. ],
       [ 16.9,  43.7,  89.4,   1. ],
       [ 27.5,   1.6,  20.7,   1. ],
       [120.5,  28.5,  14.2,   1. ],
       [  5.4,  29.9,   9.4,   1. ],
       [116. ,   7.7,  23.1,   1. ],
       [ 76.4,  26.7,  22.3,   1. ],
       [239.8,   4.1,  36.9,   1. ],
       [ 75.3,  20.3,  32.5,   1. ],
       [ 68.4,  44.5,  35.6,   1. ],
       [213.5,  43. ,  33.8,   1. ],
       [193.2,  18.4,  65.7,   1. ],
       [ 76.3,  27.5,  16. ,   1. ],
       [110.7,  40.6,  63.2,   1. ],
       [ 88.3,  25.5,  73.4,   1. ],
       [109.8,  47.8,  51.4,   1. ],
       [134.3,   4.9,   9.3,   1. ],
       [ 28.6,   1.5,  33. ,   1. ],
       [217.7,  33.5,  59. ,   1. ],
       [250.9,  36.5,  72.3,   1. ],
       [107.4,  14. ,  10.9,   1. ],
       [163.3,  31.6,  52.9,   1. ],
       [197.6,   3.5,   5.9,   1. ],
       [184.9,  21. ,  22. ,   1. ],
       [289.7,  42.3,  51.2,   1. ],
       [135.2,  41.7,  45.9,   1. ],
       [222.4,   4.3,  49.8,   1. ],
       [296.4,  36.3, 100.9,   1. ],
       [280.2,  10.1,  21.4,   1. ],
       [187.9,  17.2,  17.9,   1. ],
       [238.2,  34.3,   5.3,   1. ],
       [137.9,  46.4,  59. ,   1. ],
       [ 25. ,  11. ,  29.7,   1. ],
       [ 90.4,   0.3,  23.2,   1. ],
       [ 13.1,   0.4,  25.6,   1. ],
       [255.4,  26.9,   5.5,   1. ],
       [225.8,   8.2,  56.5,   1. ],
       [241.7,  38. ,  23.2,   1. ],
       [175.7,  15.4,   2.4,   1. ],
       [209.6,  20.6,  10.7,   1. ],
       [ 78.2,  46.8,  34.5,   1. ],
       [ 75.1,  35. ,  52.7,   1. ],
       [139.2,  14.3,  25.6,   1. ],
       [ 76.4,   0.8,  14.8,   1. ],
       [125.7,  36.9,  79.2,   1. ],
       [ 19.4,  16. ,  22.3,   1. ],
       [141.3,  26.8,  46.2,   1. ],
       [ 18.8,  21.7,  50.4,   1. ],
       [224. ,   2.4,  15.6,   1. ],
       [123.1,  34.6,  12.4,   1. ],
       [229.5,  32.3,  74.2,   1. ],
       [ 87.2,  11.8,  25.9,   1. ],
       [  7.8,  38.9,  50.6,   1. ],
       [ 80.2,   0. ,   9.2,   1. ],
       [220.3,  49. ,   3.2,   1. ],
       [ 59.6,  12. ,  43.1,   1. ],
       [  0.7,  39.6,   8.7,   1. ],
       [265.2,   2.9,  43. ,   1. ],
       [  8.4,  27.2,   2.1,   1. ],
       [219.8,  33.5,  45.1,   1. ],
       [ 36.9,  38.6,  65.6,   1. ],
       [ 48.3,  47. ,   8.5,   1. ],
       [ 25.6,  39. ,   9.3,   1. ],
       [273.7,  28.9,  59.7,   1. ],
       [ 43. ,  25.9,  20.5,   1. ],
       [184.9,  43.9,   1.7,   1. ],
       [ 73.4,  17. ,  12.9,   1. ],
       [193.7,  35.4,  75.6,   1. ],
       [220.5,  33.2,  37.9,   1. ],
       [104.6,   5.7,  34.4,   1. ],
       [ 96.2,  14.8,  38.9,   1. ],
       [140.3,   1.9,   9. ,   1. ],
       [240.1,   7.3,   8.7,   1. ],
       [243.2,  49. ,  44.3,   1. ],
       [ 38. ,  40.3,  11.9,   1. ],
       [ 44.7,  25.8,  20.6,   1. ],
       [280.7,  13.9,  37. ,   1. ],
       [121. ,   8.4,  48.7,   1. ],
       [197.6,  23.3,  14.2,   1. ],
       [171.3,  39.7,  37.7,   1. ],
       [187.8,  21.1,   9.5,   1. ],
       [  4.1,  11.6,   5.7,   1. ],
       [ 93.9,  43.5,  50.5,   1. ],
       [149.8,   1.3,  24.3,   1. ],
       [ 11.7,  36.9,  45.2,   1. ],
       [131.7,  18.4,  34.6,   1. ],
       [172.5,  18.1,  30.7,   1. ],
       [ 85.7,  35.8,  49.3,   1. ],
       [188.4,  18.1,  25.6,   1. ],
       [163.5,  36.8,   7.4,   1. ],
       [117.2,  14.7,   5.4,   1. ],
       [234.5,   3.4,  84.8,   1. ],
       [ 17.9,  37.6,  21.6,   1. ],
       [206.8,   5.2,  19.4,   1. ],
       [215.4,  23.6,  57.6,   1. ],
       [284.3,  10.6,   6.4,   1. ],
       [ 50. ,  11.6,  18.4,   1. ],
       [164.5,  20.9,  47.4,   1. ],
       [ 19.6,  20.1,  17. ,   1. ],
       [168.4,   7.1,  12.8,   1. ],
       [222.4,   3.4,  13.1,   1. ],
       [276.9,  48.9,  41.8,   1. ],
       [248.4,  30.2,  20.3,   1. ],
       [170.2,   7.8,  35.2,   1. ],
       [276.7,   2.3,  23.7,   1. ],
       [165.6,  10. ,  17.6,   1. ],
       [156.6,   2.6,   8.3,   1. ],
       [218.5,   5.4,  27.4,   1. ],
       [ 56.2,   5.7,  29.7,   1. ],
       [287.6,  43. ,  71.8,   1. ],
       [253.8,  21.3,  30. ,   1. ],
       [205. ,  45.1,  19.6,   1. ],
       [139.5,   2.1,  26.6,   1. ],
       [191.1,  28.7,  18.2,   1. ],
       [286. ,  13.9,   3.7,   1. ],
       [ 18.7,  12.1,  23.4,   1. ],
       [ 39.5,  41.1,   5.8,   1. ],
       [ 75.5,  10.8,   6. ,   1. ],
       [ 17.2,   4.1,  31.6,   1. ],
       [166.8,  42. ,   3.6,   1. ],
       [149.7,  35.6,   6. ,   1. ],
       [ 38.2,   3.7,  13.8,   1. ],
       [ 94.2,   4.9,   8.1,   1. ],
       [177. ,   9.3,   6.4,   1. ],
       [283.6,  42. ,  66.2,   1. ],
       [232.1,   8.6,   8.7,   1. ]])
In [12]:
# We are checking that the dimension of X is m x (n + 1) ; m = 200, n = 3
print(X.shape)
(200, 4)

2. Model

In [13]:
path="D:\Regression\model_ftheta_2.png"
display(Image.open(path))
In [14]:
# Initialisation du vecteur Theta
theta = np.random.randn(4,1)
theta
Out[14]:
array([[-0.80932779],
       [ 2.11885438],
       [-1.09322826],
       [-0.35867777]])
In [15]:
# Dimension of theta is (n + 1) x 1; n = 3
print(theta.shape)
(4, 1)
In [16]:
def model(X, theta):
    return X.dot(theta)
In [17]:
# We are checking that dimension of X.theta is m x 1; m = 200
F = model(X, theta)
print(F.shape)
(200, 1)

3. Cost Function

In [18]:
path="D:\Regression\cost_function_2.png"
display(Image.open(path))
In [19]:
print(y.shape)  #We can notice that, we don't have a second dimension of y
(200,)
In [20]:
y = y.reshape(y.shape[0], 1)  # We can use reshape function to add a second dimension to y vector. 
                              #We used it to be able to calculate X.theta - y
print(y.shape)
(200, 1)
In [21]:
G =  model(X, theta) - y
In [22]:
print(G.shape)
(200, 1)
In [23]:
def cost_function(X, y, theta):
    m = len(y)
    n = 1/(2*m) * np.sum((model(X, theta) - y)**2)
    return n 
In [24]:
n = cost_function (X, y, theta)
n
Out[24]:
10018.961471353592

4. Gradient and Gradient Descent

In [25]:
path="D:\Regression\gradients.png"
display(Image.open(path))
In [26]:
def grad(X, y, theta):
    m = len(y)
    return 1/m * X.T.dot(model(X, theta) - y)
In [27]:
print(X.T.shape)  #Dimension of X.T must be (n+ 1) X m
(4, 200)
In [28]:
z = grad(X, y, theta)  # Dimension of grad(X, y, theta) must be (n +1) x 1
print(z.shape)
(4, 1)
In [29]:
path="D:\Regression\gradient_descent_2.png"
display(Image.open(path))
In [30]:
def grandient_descent(X, y, theta, learning_rate, n_iterations):
    cost_history = np.zeros(n_iterations)
    for i in range(0, n_iterations):
        theta = theta - learning_rate * grad(X, y, theta)
        cost_history[i] = cost_function(X, y, theta)
    return theta,cost_history

5. Machine Learning

In [31]:
n_it = 4000
In [32]:
theta_final, cost_history = grandient_descent(X, y, theta, learning_rate = 0.00001, n_iterations = n_it)
In [33]:
plt.plot(range(n_it), cost_history)
Out[33]:
[<matplotlib.lines.Line2D at 0x220c54b8f48>]
In [34]:
# We can see that from 4000 iterations the model converges
In [35]:
predictions = model(X, theta_final)
In [36]:
my_pred = model(np.array([151.5,41.3,58.5,1]), theta_final)

my_pred
Out[36]:
array([18.40087424])
In [37]:
path="D:\Regression\determination_coef.png"
display(Image.open(path))
In [38]:
#This rate allow to mesure the percentage of 

def determination_rate(y, pred):
    u = ((y - pred)**2).sum()
    v = ((y - y.mean())**2).sum()
    return 1 - u/v
In [39]:
determination_rate(y, predictions)
Out[39]:
0.8388215305394597
In [40]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.set_xlabel('TV')
ax.set_ylabel('Radio')
ax.set_zlabel('Sales')

ax.scatter(dataset["TV"], dataset["Radio"], dataset["Sales"], c = 'b', marker = '^')

ax.scatter(dataset["TV"], dataset["Radio"], predictions, c = 'r', marker = '^')

plt.show()
In [41]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.set_xlabel('TV')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')

ax.scatter(dataset["TV"], dataset["Newspaper"], dataset["Sales"], c = 'g', marker = '^')

ax.scatter(dataset["TV"], dataset["Newspaper"], predictions, c = 'r', marker = '^')

plt.show()
In [42]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.set_xlabel('Radio')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')

ax.scatter(dataset["Radio"], dataset["Newspaper"], dataset["Sales"], c = 'y', marker = '^')

ax.scatter(dataset["Radio"], dataset["Newspaper"], predictions, c = 'r', marker = '^')

plt.show()
In [ ]:
 

How to fix errors : -         ORA-38760: This database instance failed to turn on flashback database -         ORA-38780: Restore poin...