jeudi 29 octobre 2020

How to solve Multiple Linear Regression in Python Using Gradient Descent

How to solve Multiple Linear Regression in Python Using Gradient Descent

In [1]:
from IPython.display import display
from PIL import Image
path="D:\Regression\equation_lr_multi.png"
display(Image.open(path))
In [2]:
#Import les libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import Axes3D
#from sklearn.linear_model import LinearRegression

1. Dataset

In our case, we are using 200 data relating to a number of sales and investment in different media. We have 3 independent variables reassuring investments in terms of advertising by media.

m = 200, n = 3

In [3]:
#Import Dataset

dataset = pd.read_csv("d:\Regression\Advertising.csv")
dataset
Out[3]:

Unnamed: 0TVRadioNewspaperSales
01230.137.869.222.1
1244.539.345.110.4
2317.245.969.39.3
34151.541.358.518.5
45180.810.858.412.9
..................
19519638.23.713.87.6
19619794.24.98.19.7
197198177.09.36.412.8
198199283.642.066.225.5
199200232.18.68.713.4

200 rows × 5 columns

In [4]:
X = np.array(dataset[['TV','Radio','Newspaper']])

y = np.array(dataset['Sales'])
print(y.shape)
(200,)

When observing the target fees [y] just using [TV] feature

In [5]:
plt.scatter(X[:,0], y)
Out[5]:
<matplotlib.collections.PathCollection at 0x220c4d36bc8>

When observing the target fees [y] just using [Radio] feature

In [6]:
plt.scatter(X[:,1], y)
Out[6]:
<matplotlib.collections.PathCollection at 0x220c4df2808>

When observing the target fees [y] just using [Newspaper] feature

In [7]:
plt.scatter(X[:,2], y)
Out[7]:
<matplotlib.collections.PathCollection at 0x220c4e5ce08>

When observing the target fees [y] just using [TV] and [Radio] features

In [8]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')


ax.scatter(dataset["TV"], dataset["Radio"], dataset["Sales"], c = 'r', marker = '^')

ax.set_xlabel('TV')
ax.set_ylabel('Radio')
ax.set_zlabel('Sales')
plt.show
Out[8]:
<function matplotlib.pyplot.show(*args, **kw)>
In [9]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')


ax.scatter(dataset["TV"], dataset["Newspaper"], dataset["Sales"], c = 'g', marker = '^')

ax.set_xlabel('TV')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')
plt.show
Out[9]:
<function matplotlib.pyplot.show(*args, **kw)>
In [10]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')


ax.scatter(dataset["Radio"], dataset["Newspaper"], dataset["Sales"], c = 'b', marker = '^')

ax.set_xlabel('Radio')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')
plt.show
Out[10]:
<function matplotlib.pyplot.show(*args, **kw)>
In [11]:
#We add an additional column composed by 1 on X matrix 
X = np.array(dataset[['TV','Radio','Newspaper']])
X = np.c_[X, np.ones(X.shape[0])]
X
Out[11]:
array([[230.1,  37.8,  69.2,   1. ],
       [ 44.5,  39.3,  45.1,   1. ],
       [ 17.2,  45.9,  69.3,   1. ],
       [151.5,  41.3,  58.5,   1. ],
       [180.8,  10.8,  58.4,   1. ],
       [  8.7,  48.9,  75. ,   1. ],
       [ 57.5,  32.8,  23.5,   1. ],
       [120.2,  19.6,  11.6,   1. ],
       [  8.6,   2.1,   1. ,   1. ],
       [199.8,   2.6,  21.2,   1. ],
       [ 66.1,   5.8,  24.2,   1. ],
       [214.7,  24. ,   4. ,   1. ],
       [ 23.8,  35.1,  65.9,   1. ],
       [ 97.5,   7.6,   7.2,   1. ],
       [204.1,  32.9,  46. ,   1. ],
       [195.4,  47.7,  52.9,   1. ],
       [ 67.8,  36.6, 114. ,   1. ],
       [281.4,  39.6,  55.8,   1. ],
       [ 69.2,  20.5,  18.3,   1. ],
       [147.3,  23.9,  19.1,   1. ],
       [218.4,  27.7,  53.4,   1. ],
       [237.4,   5.1,  23.5,   1. ],
       [ 13.2,  15.9,  49.6,   1. ],
       [228.3,  16.9,  26.2,   1. ],
       [ 62.3,  12.6,  18.3,   1. ],
       [262.9,   3.5,  19.5,   1. ],
       [142.9,  29.3,  12.6,   1. ],
       [240.1,  16.7,  22.9,   1. ],
       [248.8,  27.1,  22.9,   1. ],
       [ 70.6,  16. ,  40.8,   1. ],
       [292.9,  28.3,  43.2,   1. ],
       [112.9,  17.4,  38.6,   1. ],
       [ 97.2,   1.5,  30. ,   1. ],
       [265.6,  20. ,   0.3,   1. ],
       [ 95.7,   1.4,   7.4,   1. ],
       [290.7,   4.1,   8.5,   1. ],
       [266.9,  43.8,   5. ,   1. ],
       [ 74.7,  49.4,  45.7,   1. ],
       [ 43.1,  26.7,  35.1,   1. ],
       [228. ,  37.7,  32. ,   1. ],
       [202.5,  22.3,  31.6,   1. ],
       [177. ,  33.4,  38.7,   1. ],
       [293.6,  27.7,   1.8,   1. ],
       [206.9,   8.4,  26.4,   1. ],
       [ 25.1,  25.7,  43.3,   1. ],
       [175.1,  22.5,  31.5,   1. ],
       [ 89.7,   9.9,  35.7,   1. ],
       [239.9,  41.5,  18.5,   1. ],
       [227.2,  15.8,  49.9,   1. ],
       [ 66.9,  11.7,  36.8,   1. ],
       [199.8,   3.1,  34.6,   1. ],
       [100.4,   9.6,   3.6,   1. ],
       [216.4,  41.7,  39.6,   1. ],
       [182.6,  46.2,  58.7,   1. ],
       [262.7,  28.8,  15.9,   1. ],
       [198.9,  49.4,  60. ,   1. ],
       [  7.3,  28.1,  41.4,   1. ],
       [136.2,  19.2,  16.6,   1. ],
       [210.8,  49.6,  37.7,   1. ],
       [210.7,  29.5,   9.3,   1. ],
       [ 53.5,   2. ,  21.4,   1. ],
       [261.3,  42.7,  54.7,   1. ],
       [239.3,  15.5,  27.3,   1. ],
       [102.7,  29.6,   8.4,   1. ],
       [131.1,  42.8,  28.9,   1. ],
       [ 69. ,   9.3,   0.9,   1. ],
       [ 31.5,  24.6,   2.2,   1. ],
       [139.3,  14.5,  10.2,   1. ],
       [237.4,  27.5,  11. ,   1. ],
       [216.8,  43.9,  27.2,   1. ],
       [199.1,  30.6,  38.7,   1. ],
       [109.8,  14.3,  31.7,   1. ],
       [ 26.8,  33. ,  19.3,   1. ],
       [129.4,   5.7,  31.3,   1. ],
       [213.4,  24.6,  13.1,   1. ],
       [ 16.9,  43.7,  89.4,   1. ],
       [ 27.5,   1.6,  20.7,   1. ],
       [120.5,  28.5,  14.2,   1. ],
       [  5.4,  29.9,   9.4,   1. ],
       [116. ,   7.7,  23.1,   1. ],
       [ 76.4,  26.7,  22.3,   1. ],
       [239.8,   4.1,  36.9,   1. ],
       [ 75.3,  20.3,  32.5,   1. ],
       [ 68.4,  44.5,  35.6,   1. ],
       [213.5,  43. ,  33.8,   1. ],
       [193.2,  18.4,  65.7,   1. ],
       [ 76.3,  27.5,  16. ,   1. ],
       [110.7,  40.6,  63.2,   1. ],
       [ 88.3,  25.5,  73.4,   1. ],
       [109.8,  47.8,  51.4,   1. ],
       [134.3,   4.9,   9.3,   1. ],
       [ 28.6,   1.5,  33. ,   1. ],
       [217.7,  33.5,  59. ,   1. ],
       [250.9,  36.5,  72.3,   1. ],
       [107.4,  14. ,  10.9,   1. ],
       [163.3,  31.6,  52.9,   1. ],
       [197.6,   3.5,   5.9,   1. ],
       [184.9,  21. ,  22. ,   1. ],
       [289.7,  42.3,  51.2,   1. ],
       [135.2,  41.7,  45.9,   1. ],
       [222.4,   4.3,  49.8,   1. ],
       [296.4,  36.3, 100.9,   1. ],
       [280.2,  10.1,  21.4,   1. ],
       [187.9,  17.2,  17.9,   1. ],
       [238.2,  34.3,   5.3,   1. ],
       [137.9,  46.4,  59. ,   1. ],
       [ 25. ,  11. ,  29.7,   1. ],
       [ 90.4,   0.3,  23.2,   1. ],
       [ 13.1,   0.4,  25.6,   1. ],
       [255.4,  26.9,   5.5,   1. ],
       [225.8,   8.2,  56.5,   1. ],
       [241.7,  38. ,  23.2,   1. ],
       [175.7,  15.4,   2.4,   1. ],
       [209.6,  20.6,  10.7,   1. ],
       [ 78.2,  46.8,  34.5,   1. ],
       [ 75.1,  35. ,  52.7,   1. ],
       [139.2,  14.3,  25.6,   1. ],
       [ 76.4,   0.8,  14.8,   1. ],
       [125.7,  36.9,  79.2,   1. ],
       [ 19.4,  16. ,  22.3,   1. ],
       [141.3,  26.8,  46.2,   1. ],
       [ 18.8,  21.7,  50.4,   1. ],
       [224. ,   2.4,  15.6,   1. ],
       [123.1,  34.6,  12.4,   1. ],
       [229.5,  32.3,  74.2,   1. ],
       [ 87.2,  11.8,  25.9,   1. ],
       [  7.8,  38.9,  50.6,   1. ],
       [ 80.2,   0. ,   9.2,   1. ],
       [220.3,  49. ,   3.2,   1. ],
       [ 59.6,  12. ,  43.1,   1. ],
       [  0.7,  39.6,   8.7,   1. ],
       [265.2,   2.9,  43. ,   1. ],
       [  8.4,  27.2,   2.1,   1. ],
       [219.8,  33.5,  45.1,   1. ],
       [ 36.9,  38.6,  65.6,   1. ],
       [ 48.3,  47. ,   8.5,   1. ],
       [ 25.6,  39. ,   9.3,   1. ],
       [273.7,  28.9,  59.7,   1. ],
       [ 43. ,  25.9,  20.5,   1. ],
       [184.9,  43.9,   1.7,   1. ],
       [ 73.4,  17. ,  12.9,   1. ],
       [193.7,  35.4,  75.6,   1. ],
       [220.5,  33.2,  37.9,   1. ],
       [104.6,   5.7,  34.4,   1. ],
       [ 96.2,  14.8,  38.9,   1. ],
       [140.3,   1.9,   9. ,   1. ],
       [240.1,   7.3,   8.7,   1. ],
       [243.2,  49. ,  44.3,   1. ],
       [ 38. ,  40.3,  11.9,   1. ],
       [ 44.7,  25.8,  20.6,   1. ],
       [280.7,  13.9,  37. ,   1. ],
       [121. ,   8.4,  48.7,   1. ],
       [197.6,  23.3,  14.2,   1. ],
       [171.3,  39.7,  37.7,   1. ],
       [187.8,  21.1,   9.5,   1. ],
       [  4.1,  11.6,   5.7,   1. ],
       [ 93.9,  43.5,  50.5,   1. ],
       [149.8,   1.3,  24.3,   1. ],
       [ 11.7,  36.9,  45.2,   1. ],
       [131.7,  18.4,  34.6,   1. ],
       [172.5,  18.1,  30.7,   1. ],
       [ 85.7,  35.8,  49.3,   1. ],
       [188.4,  18.1,  25.6,   1. ],
       [163.5,  36.8,   7.4,   1. ],
       [117.2,  14.7,   5.4,   1. ],
       [234.5,   3.4,  84.8,   1. ],
       [ 17.9,  37.6,  21.6,   1. ],
       [206.8,   5.2,  19.4,   1. ],
       [215.4,  23.6,  57.6,   1. ],
       [284.3,  10.6,   6.4,   1. ],
       [ 50. ,  11.6,  18.4,   1. ],
       [164.5,  20.9,  47.4,   1. ],
       [ 19.6,  20.1,  17. ,   1. ],
       [168.4,   7.1,  12.8,   1. ],
       [222.4,   3.4,  13.1,   1. ],
       [276.9,  48.9,  41.8,   1. ],
       [248.4,  30.2,  20.3,   1. ],
       [170.2,   7.8,  35.2,   1. ],
       [276.7,   2.3,  23.7,   1. ],
       [165.6,  10. ,  17.6,   1. ],
       [156.6,   2.6,   8.3,   1. ],
       [218.5,   5.4,  27.4,   1. ],
       [ 56.2,   5.7,  29.7,   1. ],
       [287.6,  43. ,  71.8,   1. ],
       [253.8,  21.3,  30. ,   1. ],
       [205. ,  45.1,  19.6,   1. ],
       [139.5,   2.1,  26.6,   1. ],
       [191.1,  28.7,  18.2,   1. ],
       [286. ,  13.9,   3.7,   1. ],
       [ 18.7,  12.1,  23.4,   1. ],
       [ 39.5,  41.1,   5.8,   1. ],
       [ 75.5,  10.8,   6. ,   1. ],
       [ 17.2,   4.1,  31.6,   1. ],
       [166.8,  42. ,   3.6,   1. ],
       [149.7,  35.6,   6. ,   1. ],
       [ 38.2,   3.7,  13.8,   1. ],
       [ 94.2,   4.9,   8.1,   1. ],
       [177. ,   9.3,   6.4,   1. ],
       [283.6,  42. ,  66.2,   1. ],
       [232.1,   8.6,   8.7,   1. ]])
In [12]:
# We are checking that the dimension of X is m x (n + 1) ; m = 200, n = 3
print(X.shape)
(200, 4)

2. Model

In [13]:
path="D:\Regression\model_ftheta_2.png"
display(Image.open(path))
In [14]:
# Initialisation du vecteur Theta
theta = np.random.randn(4,1)
theta
Out[14]:
array([[-0.80932779],
       [ 2.11885438],
       [-1.09322826],
       [-0.35867777]])
In [15]:
# Dimension of theta is (n + 1) x 1; n = 3
print(theta.shape)
(4, 1)
In [16]:
def model(X, theta):
    return X.dot(theta)
In [17]:
# We are checking that dimension of X.theta is m x 1; m = 200
F = model(X, theta)
print(F.shape)
(200, 1)

3. Cost Function

In [18]:
path="D:\Regression\cost_function_2.png"
display(Image.open(path))
In [19]:
print(y.shape)  #We can notice that, we don't have a second dimension of y
(200,)
In [20]:
y = y.reshape(y.shape[0], 1)  # We can use reshape function to add a second dimension to y vector. 
                              #We used it to be able to calculate X.theta - y
print(y.shape)
(200, 1)
In [21]:
G =  model(X, theta) - y
In [22]:
print(G.shape)
(200, 1)
In [23]:
def cost_function(X, y, theta):
    m = len(y)
    n = 1/(2*m) * np.sum((model(X, theta) - y)**2)
    return n 
In [24]:
n = cost_function (X, y, theta)
n
Out[24]:
10018.961471353592

4. Gradient and Gradient Descent

In [25]:
path="D:\Regression\gradients.png"
display(Image.open(path))
In [26]:
def grad(X, y, theta):
    m = len(y)
    return 1/m * X.T.dot(model(X, theta) - y)
In [27]:
print(X.T.shape)  #Dimension of X.T must be (n+ 1) X m
(4, 200)
In [28]:
z = grad(X, y, theta)  # Dimension of grad(X, y, theta) must be (n +1) x 1
print(z.shape)
(4, 1)
In [29]:
path="D:\Regression\gradient_descent_2.png"
display(Image.open(path))
In [30]:
def grandient_descent(X, y, theta, learning_rate, n_iterations):
    cost_history = np.zeros(n_iterations)
    for i in range(0, n_iterations):
        theta = theta - learning_rate * grad(X, y, theta)
        cost_history[i] = cost_function(X, y, theta)
    return theta,cost_history

5. Machine Learning

In [31]:
n_it = 4000
In [32]:
theta_final, cost_history = grandient_descent(X, y, theta, learning_rate = 0.00001, n_iterations = n_it)
In [33]:
plt.plot(range(n_it), cost_history)
Out[33]:
[<matplotlib.lines.Line2D at 0x220c54b8f48>]
In [34]:
# We can see that from 4000 iterations the model converges
In [35]:
predictions = model(X, theta_final)
In [36]:
my_pred = model(np.array([151.5,41.3,58.5,1]), theta_final)

my_pred
Out[36]:
array([18.40087424])
In [37]:
path="D:\Regression\determination_coef.png"
display(Image.open(path))
In [38]:
#This rate allow to mesure the percentage of 

def determination_rate(y, pred):
    u = ((y - pred)**2).sum()
    v = ((y - y.mean())**2).sum()
    return 1 - u/v
In [39]:
determination_rate(y, predictions)
Out[39]:
0.8388215305394597
In [40]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.set_xlabel('TV')
ax.set_ylabel('Radio')
ax.set_zlabel('Sales')

ax.scatter(dataset["TV"], dataset["Radio"], dataset["Sales"], c = 'b', marker = '^')

ax.scatter(dataset["TV"], dataset["Radio"], predictions, c = 'r', marker = '^')

plt.show()
In [41]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.set_xlabel('TV')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')

ax.scatter(dataset["TV"], dataset["Newspaper"], dataset["Sales"], c = 'g', marker = '^')

ax.scatter(dataset["TV"], dataset["Newspaper"], predictions, c = 'r', marker = '^')

plt.show()
In [42]:
graph = plt.figure()
ax = graph.add_subplot(111, projection = '3d')
ax.set_xlabel('Radio')
ax.set_ylabel('Newspaper')
ax.set_zlabel('Sales')

ax.scatter(dataset["Radio"], dataset["Newspaper"], dataset["Sales"], c = 'y', marker = '^')

ax.scatter(dataset["Radio"], dataset["Newspaper"], predictions, c = 'r', marker = '^')

plt.show()
In [ ]:
 

Mise en œuvre de l'élasticité dans une application Web dans un Cloud (AWS)

  Demande de solution AnyCompany Gaming est une entreprise de jeux en ligne en pleine croissance qui connaît une augmentation à trois chif...