From fa3067ebb15c6f234cd8ee26e9f9b682edcd25b2 Mon Sep 17 00:00:00 2001 From: Orrm23 Date: Tue, 25 Feb 2025 06:20:55 -0800 Subject: [PATCH] regression --- 18_RegressionModelSelection.ipynb | 922 ++++++++++++++++++++++++++++++ 1 file changed, 922 insertions(+) create mode 100644 18_RegressionModelSelection.ipynb diff --git a/18_RegressionModelSelection.ipynb b/18_RegressionModelSelection.ipynb new file mode 100644 index 0000000..e18bc03 --- /dev/null +++ b/18_RegressionModelSelection.ipynb @@ -0,0 +1,922 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r3cas2_1T98w" + }, + "source": [ + "# Regression Model Selection" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IODliia6U1xO" + }, + "source": [ + "## Importing the basic libraries" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "y98nA5UdU6Hf" + }, + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2hRC6YEod9_8" + }, + "source": [ + "### Load Dataset from Local Directory" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tZBTr4JHeAzb", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 73 + }, + "outputId": "c5931337-dd18-4b92-f759-1b45c2bf1a32" + }, + "source": [ + "from google.colab import files\n", + "uploaded = files.upload()" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving dataset.csv to dataset.csv\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jpjZ43YlU8eI" + }, + "source": [ + "## Importing the dataset" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pLVaXoYVU_Uy" + }, + "source": [ + "dataset = pd.read_csv('dataset.csv')\n", + "X = dataset.iloc[:, :-1].values\n", + "y = dataset.iloc[:, -1].values\n", + "ysvm = y.reshape(len(y),1)" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tPJXMyyUJbWn" + }, + "source": [ + "## Splitting the dataset into the Training set and Test set" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "rFOzpjaiJd5B" + }, + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n", + "X_trainsvm, X_testsvm, y_trainsvm, y_testsvm = train_test_split(X, ysvm, test_size = 0.2, random_state = 0)" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5bU75GVthaOj" + }, + "source": [ + "### Importing Machine Learning Algorithms" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YF8HkKVYhag7" + }, + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.svm import SVR" + ], + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g16qFkFQVC35" + }, + "source": [ + "## Initializing different Regression algorithms" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "SLDKyv1SVUqS" + }, + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "modelLR = LinearRegression()\n", + "\n", + "poly_reg = PolynomialFeatures(degree = 4)\n", + "X_poly = poly_reg.fit_transform(X_train)\n", + "modelPLR = LinearRegression()\n", + "\n", + "modelRFR = RandomForestRegressor(n_estimators = 10, random_state = 0)\n", + "\n", + "modelDTR = DecisionTreeRegressor(random_state = 0)\n", + "\n", + "modelSVR = SVR(kernel = 'rbf')\n", + "\n", + "sc_X = StandardScaler()\n", + "sc_y = StandardScaler()\n", + "X_trainsvm = sc_X.fit_transform(X_trainsvm)\n", + "y_trainsvm = sc_y.fit_transform(y_trainsvm)" + ], + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ePPz0Lu6fXfN" + }, + "source": [ + "### Training Regression algorithm" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oHhA2uoyfEK2", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 137 + }, + "outputId": "62527e59-2484-46c1-8dde-85d85e397f94" + }, + "source": [ + "modelLR.fit(X_train, y_train)\n", + "modelPLR.fit(X_poly, y_train)\n", + "modelRFR.fit(X_train, y_train)\n", + "modelDTR.fit(X_train, y_train)\n", + "modelSVR.fit(X_trainsvm, y_trainsvm)" + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/sklearn/utils/validation.py:1408: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True)\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVR()" + ], + "text/html": [ + "
SVR()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Nao9cdO6IgNb" + }, + "source": [ + "## Predicting the Test set for Validation" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "EebHA3EOIkQK" + }, + "source": [ + "modelLRy_pred = modelLR.predict(X_test)\n", + "modelPLRy_pred = modelPLR.predict(poly_reg.transform(X_test))\n", + "modelRFRy_pred = modelRFR.predict(X_test)\n", + "modelDTRy_pred = modelDTR.predict(X_test)\n", + "modelSVRy_pred = sc_y.inverse_transform(modelSVR.predict(sc_X.transform(X_test)).reshape(-1, 1))" + ], + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V4nELFnnIod1" + }, + "source": [ + "## Evaluating the Model Performance" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_G2QS1UoIsTZ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "8c51ca4f-7b43-4d4f-a32b-b2b7bc1f8fe4" + }, + "source": [ + "from sklearn.metrics import r2_score\n", + "print(\"Linear Regression Accuracy: {}\".format(r2_score(y_test, modelLRy_pred)))\n", + "print(\"Polynomial Regression Accuracy: {}\".format(r2_score(y_test, modelPLRy_pred)))\n", + "print(\"Random Forest Regression Accuracy: {}\".format(r2_score(y_test, modelRFRy_pred)))\n", + "print(\"Decision Treee Regression Accuracy: {}\".format(r2_score(y_test, modelDTRy_pred)))\n", + "print(\"Support Vector Regression Accuracy: {}\".format(r2_score(y_test, modelSVRy_pred)))" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Linear Regression Accuracy: 0.9325315554761303\n", + "Polynomial Regression Accuracy: 0.9455261540927579\n", + "Random Forest Regression Accuracy: 0.9615908334363876\n", + "Decision Treee Regression Accuracy: 0.922905874177941\n", + "Support Vector Regression Accuracy: 0.9480784049986258\n" + ] + } + ] + } + ] +} \ No newline at end of file