{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Trenujemy klasyfikator dla Irysów" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from pycaret.classification import setup as cls_setup, compare_models as cls_compare_models, finalize_model as cls_finalize_model, predict_model as cls_predict_model, plot_model as cls_plot_model, save_model as cls_save_model\n", "from pycaret.regression import setup as reg_setup, compare_models as reg_compare_models, finalize_model as reg_finalize_model, predict_model as reg_predict_model, plot_model as reg_plot_model, save_model as reg_save_model\n", "from pycaret.datasets import get_data\n", "import pandas as pd\n", "from IPython.display import Markdown, display" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Wyświetlenie wszystkich dostępnych zbiorów danych" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DatasetData TypesDefault TaskTarget Variable 1Target Variable 2# Instances# AttributesMissing Values
0anomalyMultivariateAnomaly DetectionNaNNaN100010N
1franceMultivariateAssociation Rule MiningInvoiceNoDescription85578N
2germanyMultivariateAssociation Rule MiningInvoiceNoDescription94958N
3bankMultivariateClassification (Binary)depositNaN4521117N
4bloodMultivariateClassification (Binary)ClassNaN7485N
5cancerMultivariateClassification (Binary)ClassNaN68310N
6creditMultivariateClassification (Binary)defaultNaN2400024N
7diabetesMultivariateClassification (Binary)Class variableNaN7689N
8electrical_gridMultivariateClassification (Binary)stabfNaN1000014N
9employeeMultivariateClassification (Binary)leftNaN1499910N
10heartMultivariateClassification (Binary)DEATHNaN20016N
11heart_diseaseMultivariateClassification (Binary)DiseaseNaN27014N
12hepatitisMultivariateClassification (Binary)ClassNaN15432Y
13incomeMultivariateClassification (Binary)income >50KNaN3256114Y
14juiceMultivariateClassification (Binary)PurchaseNaN107015N
15nbaMultivariateClassification (Binary)TARGET_5YrsNaN134021N
16wineMultivariateClassification (Binary)typeNaN649813N
17telescopeMultivariateClassification (Binary)ClassNaN1902011N
18titanicMultivariateClassification (Binary)SurvivedNaN89111Y
19us_presidential_election_resultsMultivariateClassification (Binary)party_winnerNaN4977N
20glassMultivariateClassification (Multiclass)TypeNaN21410N
21irisMultivariateClassification (Multiclass)speciesNaN1505N
22pokerMultivariateClassification (Multiclass)CLASSNaN10000011N
23questionsMultivariateClassification (Multiclass)Next_QuestionNaN4994N
24satelliteMultivariateClassification (Multiclass)ClassNaN643537N
25CTGMultivariateClassification (Multiclass)NSPNaN212940Y
26asia_gdpMultivariateClusteringNaNNaN4011N
27electionsMultivariateClusteringNaNNaN319554Y
28facebookMultivariateClusteringNaNNaN705012N
29iplMultivariateClusteringNaNNaN15325N
30jewelleryMultivariateClusteringNaNNaN5054N
31miceMultivariateClusteringNaNNaN108082Y
32migrationMultivariateClusteringNaNNaN23312N
33perfumeMultivariateClusteringNaNNaN2029N
34pokemonMultivariateClusteringNaNNaN80013Y
35populationMultivariateClusteringNaNNaN25556Y
36public_healthMultivariateClusteringNaNNaN22421N
37seedsMultivariateClusteringNaNNaN2107N
38wholesaleMultivariateClusteringNaNNaN4408N
39tweetsTextNLPtweetNaN85942N
40amazonTextNLP / ClassificationreviewTextNaN200002N
41kivaTextNLP / ClassificationenNaN68187N
42spxTextNLP / RegressiontextNaN8744N
43wikipediaTextNLP / ClassificationTextNaN5003N
44automobileMultivariateRegressionpriceNaN20226Y
45bikeMultivariateRegressioncntNaN1737915N
46bostonMultivariateRegressionmedvNaN50614N
47concreteMultivariateRegressionstrengthNaN10309N
48diamondMultivariateRegressionPriceNaN60008N
49energyMultivariateRegressionHeating LoadCooling Load76810N
50forestMultivariateRegressionareaNaN51713N
51goldMultivariateRegressionGold_T+22NaN2558121N
52houseMultivariateRegressionSalePriceNaN146181Y
53insuranceMultivariateRegressionchargesNaN13387N
54parkinsonsMultivariateRegressionPPENaN587522N
55trafficMultivariateRegressiontraffic_volumeNaN482048N
\n", "
" ], "text/plain": [ " Dataset Data Types \\\n", "0 anomaly Multivariate \n", "1 france Multivariate \n", "2 germany Multivariate \n", "3 bank Multivariate \n", "4 blood Multivariate \n", "5 cancer Multivariate \n", "6 credit Multivariate \n", "7 diabetes Multivariate \n", "8 electrical_grid Multivariate \n", "9 employee Multivariate \n", "10 heart Multivariate \n", "11 heart_disease Multivariate \n", "12 hepatitis Multivariate \n", "13 income Multivariate \n", "14 juice Multivariate \n", "15 nba Multivariate \n", "16 wine Multivariate \n", "17 telescope Multivariate \n", "18 titanic Multivariate \n", "19 us_presidential_election_results Multivariate \n", "20 glass Multivariate \n", "21 iris Multivariate \n", "22 poker Multivariate \n", "23 questions Multivariate \n", "24 satellite Multivariate \n", "25 CTG Multivariate \n", "26 asia_gdp Multivariate \n", "27 elections Multivariate \n", "28 facebook Multivariate \n", "29 ipl Multivariate \n", "30 jewellery Multivariate \n", "31 mice Multivariate \n", "32 migration Multivariate \n", "33 perfume Multivariate \n", "34 pokemon Multivariate \n", "35 population Multivariate \n", "36 public_health Multivariate \n", "37 seeds Multivariate \n", "38 wholesale Multivariate \n", "39 tweets Text \n", "40 amazon Text \n", "41 kiva Text \n", "42 spx Text \n", "43 wikipedia Text \n", "44 automobile Multivariate \n", "45 bike Multivariate \n", "46 boston Multivariate \n", "47 concrete Multivariate \n", "48 diamond Multivariate \n", "49 energy Multivariate \n", "50 forest Multivariate \n", "51 gold Multivariate \n", "52 house Multivariate \n", "53 insurance Multivariate \n", "54 parkinsons Multivariate \n", "55 traffic Multivariate \n", "\n", " Default Task Target Variable 1 Target Variable 2 \\\n", "0 Anomaly Detection NaN NaN \n", "1 Association Rule Mining InvoiceNo Description \n", "2 Association Rule Mining InvoiceNo Description \n", "3 Classification (Binary) deposit NaN \n", "4 Classification (Binary) Class NaN \n", "5 Classification (Binary) Class NaN \n", "6 Classification (Binary) default NaN \n", "7 Classification (Binary) Class variable NaN \n", "8 Classification (Binary) stabf NaN \n", "9 Classification (Binary) left NaN \n", "10 Classification (Binary) DEATH NaN \n", "11 Classification (Binary) Disease NaN \n", "12 Classification (Binary) Class NaN \n", "13 Classification (Binary) income >50K NaN \n", "14 Classification (Binary) Purchase NaN \n", "15 Classification (Binary) TARGET_5Yrs NaN \n", "16 Classification (Binary) type NaN \n", "17 Classification (Binary) Class NaN \n", "18 Classification (Binary) Survived NaN \n", "19 Classification (Binary) party_winner NaN \n", "20 Classification (Multiclass) Type NaN \n", "21 Classification (Multiclass) species NaN \n", "22 Classification (Multiclass) CLASS NaN \n", "23 Classification (Multiclass) Next_Question NaN \n", "24 Classification (Multiclass) Class NaN \n", "25 Classification (Multiclass) NSP NaN \n", "26 Clustering NaN NaN \n", "27 Clustering NaN NaN \n", "28 Clustering NaN NaN \n", "29 Clustering NaN NaN \n", "30 Clustering NaN NaN \n", "31 Clustering NaN NaN \n", "32 Clustering NaN NaN \n", "33 Clustering NaN NaN \n", "34 Clustering NaN NaN \n", "35 Clustering NaN NaN \n", "36 Clustering NaN NaN \n", "37 Clustering NaN NaN \n", "38 Clustering NaN NaN \n", "39 NLP tweet NaN \n", "40 NLP / Classification reviewText NaN \n", "41 NLP / Classification en NaN \n", "42 NLP / Regression text NaN \n", "43 NLP / Classification Text NaN \n", "44 Regression price NaN \n", "45 Regression cnt NaN \n", "46 Regression medv NaN \n", "47 Regression strength NaN \n", "48 Regression Price NaN \n", "49 Regression Heating Load Cooling Load \n", "50 Regression area NaN \n", "51 Regression Gold_T+22 NaN \n", "52 Regression SalePrice NaN \n", "53 Regression charges NaN \n", "54 Regression PPE NaN \n", "55 Regression traffic_volume NaN \n", "\n", " # Instances # Attributes Missing Values \n", "0 1000 10 N \n", "1 8557 8 N \n", "2 9495 8 N \n", "3 45211 17 N \n", "4 748 5 N \n", "5 683 10 N \n", "6 24000 24 N \n", "7 768 9 N \n", "8 10000 14 N \n", "9 14999 10 N \n", "10 200 16 N \n", "11 270 14 N \n", "12 154 32 Y \n", "13 32561 14 Y \n", "14 1070 15 N \n", "15 1340 21 N \n", "16 6498 13 N \n", "17 19020 11 N \n", "18 891 11 Y \n", "19 497 7 N \n", "20 214 10 N \n", "21 150 5 N \n", "22 100000 11 N \n", "23 499 4 N \n", "24 6435 37 N \n", "25 2129 40 Y \n", "26 40 11 N \n", "27 3195 54 Y \n", "28 7050 12 N \n", "29 153 25 N \n", "30 505 4 N \n", "31 1080 82 Y \n", "32 233 12 N \n", "33 20 29 N \n", "34 800 13 Y \n", "35 255 56 Y \n", "36 224 21 N \n", "37 210 7 N \n", "38 440 8 N \n", "39 8594 2 N \n", "40 20000 2 N \n", "41 6818 7 N \n", "42 874 4 N \n", "43 500 3 N \n", "44 202 26 Y \n", "45 17379 15 N \n", "46 506 14 N \n", "47 1030 9 N \n", "48 6000 8 N \n", "49 768 10 N \n", "50 517 13 N \n", "51 2558 121 N \n", "52 1461 81 Y \n", "53 1338 7 N \n", "54 5875 22 N \n", "55 48204 8 N " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset_df = get_data('index', verbose=False)\n", "dataset_df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = get_data('titanic')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Ilość rekordów" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "891" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Brakujące wartości" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.22446689113355783" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Embarked'].isnull().sum() / len(df['Embarked']) *100" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "## Mamy do czynienia z problemem: **Klasyfikacji**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "target_column = 'Survived'\n", "cls_setup(data=df, target=target_column, session_id=123, verbose=False)\n", "#reg_setup(data=df, target='petal_width', session_id=123, verbose=False)\n", "\n", "# Sprawdzenie czy to model: REGRESJI czy KLASYFIKACJI\n", "if target_column:\n", " if (pd.api.types.is_numeric_dtype(df[target_column])) and (df[target_column].nunique() > 10):\n", " display(Markdown('## Mamy do czynienia z problemem: **Regresji**'))\n", " else:\n", " display(Markdown('## Mamy do czynienia z problemem: **Klasyfikacji**'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compare Models" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 ModelAccuracyAUCRecallPrec.F1KappaMCCTT (Sec)
lrLogistic Regression0.80560.86920.67390.78830.72280.57580.58251.7720
ridgeRidge Classifier0.75280.86470.45220.82570.57930.42730.46790.0700
etExtra Trees Classifier0.74000.00000.47740.76540.58150.40880.43560.1520
ldaLinear Discriminant Analysis0.62600.53820.03480.08000.04850.03350.03890.0650
dtDecision Tree Classifier0.61640.00000.00000.00000.00000.00000.00000.0890
rfRandom Forest Classifier0.61640.00000.00000.00000.00000.00000.00000.1780
qdaQuadratic Discriminant Analysis0.61640.51660.00000.00000.00000.00000.00000.0700
adaAda Boost Classifier0.61640.50000.00000.00000.00000.00000.00000.0690
gbcGradient Boosting Classifier0.61640.50000.00000.00000.00000.00000.00000.1240
lightgbmLight Gradient Boosting Machine0.61640.00000.00000.00000.00000.00000.00000.0930
dummyDummy Classifier0.61640.00000.00000.00000.00000.00000.00000.0750
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d7df699944cc45f7a7031b2299d31331", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Processing: 0%| | 0/45 [00:00" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "cls_plot_model(best_model, plot='feature')\n", "#reg_plot_model(best_model, plot='feature')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Finalize Model and Save" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Transformation Pipeline and Model Successfully Saved\n" ] }, { "data": { "text/plain": [ "(Pipeline(memory=Memory(location=None),\n", " steps=[('label_encoding',\n", " TransformerWrapperWithInverse(exclude=None, include=None,\n", " transformer=LabelEncoder())),\n", " ('numerical_imputer',\n", " TransformerWrapper(exclude=None,\n", " include=['sepal_length', 'sepal_width',\n", " 'petal_length', 'petal_width'],\n", " transformer=SimpleImputer(add_indicator=False,\n", " copy=True,\n", " fill_value=None,\n", " keep_empt...\n", " fill_value=None,\n", " keep_empty_features=False,\n", " missing_values=nan,\n", " strategy='most_frequent'))),\n", " ('actual_estimator',\n", " LogisticRegression(C=1.0, class_weight=None, dual=False,\n", " fit_intercept=True, intercept_scaling=1,\n", " l1_ratio=None, max_iter=1000,\n", " multi_class='auto', n_jobs=None,\n", " penalty='l2', random_state=123,\n", " solver='lbfgs', tol=0.0001, verbose=0,\n", " warm_start=False))],\n", " verbose=False),\n", " 'iris_classification_pipeline.pkl')" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_model = cls_finalize_model(best_model)\n", "#final_model = reg_finalize_model(best_model)\n", "cls_save_model(final_model, 'iris_classification_pipeline')\n", "#reg_save_model(final_model, 'iris_regression_pipeline')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 4 }