{ "cells": [ { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n", "0 842302 M 17.99 10.38 122.80 1001.0 \n", "1 842517 M 20.57 17.77 132.90 1326.0 \n", "2 84300903 M 19.69 21.25 130.00 1203.0 \n", "3 84348301 M 11.42 20.38 77.58 386.1 \n", "4 84358402 M 20.29 14.34 135.10 1297.0 \n", "\n", " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", "0 0.11840 0.27760 0.3001 0.14710 \n", "1 0.08474 0.07864 0.0869 0.07017 \n", "2 0.10960 0.15990 0.1974 0.12790 \n", "3 0.14250 0.28390 0.2414 0.10520 \n", "4 0.10030 0.13280 0.1980 0.10430 \n", "\n", " ... radius_worst texture_worst perimeter_worst area_worst \\\n", "0 ... 25.38 17.33 184.60 2019.0 \n", "1 ... 24.99 23.41 158.80 1956.0 \n", "2 ... 23.57 25.53 152.50 1709.0 \n", "3 ... 14.91 26.50 98.87 567.7 \n", "4 ... 22.54 16.67 152.20 1575.0 \n", "\n", " smoothness_worst compactness_worst concavity_worst concave_points_worst \\\n", "0 0.1622 0.6656 0.7119 0.2654 \n", "1 0.1238 0.1866 0.2416 0.1860 \n", "2 0.1444 0.4245 0.4504 0.2430 \n", "3 0.2098 0.8663 0.6869 0.2575 \n", "4 0.1374 0.2050 0.4000 0.1625 \n", "\n", " symmetry_worst fractal_dimension_worst \n", "0 0.4601 0.11890 \n", "1 0.2750 0.08902 \n", "2 0.3613 0.08758 \n", "3 0.6638 0.17300 \n", "4 0.2364 0.07678 \n", "\n", "[5 rows x 32 columns]\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.svm import SVC\n", "from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score\n", "\n", "data = pd.read_csv(\"bc_data.csv\")\n", "\n", "print(data.head())" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:09:04.693692Z", "start_time": "2024-03-04T02:09:02.773273Z" } }, "id": "4d508bb97c474b49", "execution_count": 1 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " id radius_mean texture_mean perimeter_mean area_mean \\\n", "count 5.690000e+02 569.000000 569.000000 569.000000 569.000000 \n", "mean 3.037183e+07 14.127292 19.289649 91.969033 654.889104 \n", "std 1.250206e+08 3.524049 4.301036 24.298981 351.914129 \n", "min 8.670000e+03 6.981000 9.710000 43.790000 143.500000 \n", "25% 8.692180e+05 11.700000 16.170000 75.170000 420.300000 \n", "50% 9.060240e+05 13.370000 18.840000 86.240000 551.100000 \n", "75% 8.813129e+06 15.780000 21.800000 104.100000 782.700000 \n", "max 9.113205e+08 28.110000 39.280000 188.500000 2501.000000 \n", "\n", " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", "count 569.000000 569.000000 569.000000 569.000000 \n", "mean 0.096360 0.104341 0.088799 0.048919 \n", "std 0.014064 0.052813 0.079720 0.038803 \n", "min 0.052630 0.019380 0.000000 0.000000 \n", "25% 0.086370 0.064920 0.029560 0.020310 \n", "50% 0.095870 0.092630 0.061540 0.033500 \n", "75% 0.105300 0.130400 0.130700 0.074000 \n", "max 0.163400 0.345400 0.426800 0.201200 \n", "\n", " symmetry_mean ... radius_worst texture_worst perimeter_worst \\\n", "count 569.000000 ... 569.000000 569.000000 569.000000 \n", "mean 0.181162 ... 16.269190 25.677223 107.261213 \n", "std 0.027414 ... 4.833242 6.146258 33.602542 \n", "min 0.106000 ... 7.930000 12.020000 50.410000 \n", "25% 0.161900 ... 13.010000 21.080000 84.110000 \n", "50% 0.179200 ... 14.970000 25.410000 97.660000 \n", "75% 0.195700 ... 18.790000 29.720000 125.400000 \n", "max 0.304000 ... 36.040000 49.540000 251.200000 \n", "\n", " area_worst smoothness_worst compactness_worst concavity_worst \\\n", "count 569.000000 569.000000 569.000000 569.000000 \n", "mean 880.583128 0.132369 0.254265 0.272188 \n", "std 569.356993 0.022832 0.157336 0.208624 \n", "min 185.200000 0.071170 0.027290 0.000000 \n", "25% 515.300000 0.116600 0.147200 0.114500 \n", "50% 686.500000 0.131300 0.211900 0.226700 \n", "75% 1084.000000 0.146000 0.339100 0.382900 \n", "max 4254.000000 0.222600 1.058000 1.252000 \n", "\n", " concave_points_worst symmetry_worst fractal_dimension_worst \n", "count 569.000000 569.000000 569.000000 \n", "mean 0.114606 0.290076 0.083946 \n", "std 0.065732 0.061867 0.018061 \n", "min 0.000000 0.156500 0.055040 \n", "25% 0.064930 0.250400 0.071460 \n", "50% 0.099930 0.282200 0.080040 \n", "75% 0.161400 0.317900 0.092080 \n", "max 0.291000 0.663800 0.207500 \n", "\n", "[8 rows x 31 columns]\n" ] } ], "source": [ "print(data.describe())" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:09:32.977030Z", "start_time": "2024-03-04T02:09:32.949925Z" } }, "id": "9aaf39ec5b291ec", "execution_count": 2 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 569 entries, 0 to 568\n", "Data columns (total 32 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 id 569 non-null int64 \n", " 1 diagnosis 569 non-null object \n", " 2 radius_mean 569 non-null float64\n", " 3 texture_mean 569 non-null float64\n", " 4 perimeter_mean 569 non-null float64\n", " 5 area_mean 569 non-null float64\n", " 6 smoothness_mean 569 non-null float64\n", " 7 compactness_mean 569 non-null float64\n", " 8 concavity_mean 569 non-null float64\n", " 9 concave points_mean 569 non-null float64\n", " 10 symmetry_mean 569 non-null float64\n", " 11 fractal_dimension_mean 569 non-null float64\n", " 12 radius_se 569 non-null float64\n", " 13 texture_se 569 non-null float64\n", " 14 perimeter_se 569 non-null float64\n", " 15 area_se 569 non-null float64\n", " 16 smoothness_se 569 non-null float64\n", " 17 compactness_se 569 non-null float64\n", " 18 concavity_se 569 non-null float64\n", " 19 concave points_se 569 non-null float64\n", " 20 symmetry_se 569 non-null float64\n", " 21 fractal_dimension_se 569 non-null float64\n", " 22 radius_worst 569 non-null float64\n", " 23 texture_worst 569 non-null float64\n", " 24 perimeter_worst 569 non-null float64\n", " 25 area_worst 569 non-null float64\n", " 26 smoothness_worst 569 non-null float64\n", " 27 compactness_worst 569 non-null float64\n", " 28 concavity_worst 569 non-null float64\n", " 29 concave_points_worst 569 non-null float64\n", " 30 symmetry_worst 569 non-null float64\n", " 31 fractal_dimension_worst 569 non-null float64\n", "dtypes: float64(30), int64(1), object(1)\n", "memory usage: 142.4+ KB\n", "None\n" ] } ], "source": [ "print(data.info())" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:09:38.704303Z", "start_time": "2024-03-04T02:09:38.698070Z" } }, "id": "be3970d0e109f653", "execution_count": 3 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(569, 32)\n" ] } ], "source": [ "print(data.shape)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:10:10.552204Z", "start_time": "2024-03-04T02:10:10.548362Z" } }, "id": "97d96f07ececf49d", "execution_count": 4 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "id 0\n", "diagnosis 0\n", "radius_mean 0\n", "texture_mean 0\n", "perimeter_mean 0\n", "area_mean 0\n", "smoothness_mean 0\n", "compactness_mean 0\n", "concavity_mean 0\n", "concave points_mean 0\n", "symmetry_mean 0\n", "fractal_dimension_mean 0\n", "radius_se 0\n", "texture_se 0\n", "perimeter_se 0\n", "area_se 0\n", "smoothness_se 0\n", "compactness_se 0\n", "concavity_se 0\n", "concave points_se 0\n", "symmetry_se 0\n", "fractal_dimension_se 0\n", "radius_worst 0\n", "texture_worst 0\n", "perimeter_worst 0\n", "area_worst 0\n", "smoothness_worst 0\n", "compactness_worst 0\n", "concavity_worst 0\n", "concave_points_worst 0\n", "symmetry_worst 0\n", "fractal_dimension_worst 0\n", "dtype: int64\n" ] } ], "source": [ "print(data.isnull().sum())" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:10:16.526676Z", "start_time": "2024-03-04T02:10:16.522865Z" } }, "id": "be536489e70e319b", "execution_count": 5 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "diagnosis\n", "B 357\n", "M 212\n", "Name: count, dtype: int64\n" ] } ], "source": [ "print(data[\"diagnosis\"].value_counts())" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:10:22.975008Z", "start_time": "2024-03-04T02:10:22.971472Z" } }, "id": "b177c73a01ea9fbd", "execution_count": 6 }, { "cell_type": "code", "outputs": [], "source": [ "data[\"diagnosis\"] = data[\"diagnosis\"].map({\"B\": 0, \"M\": 1})\n", "X_train, X_test, y_train, y_test = train_test_split(data.drop(\"diagnosis\", axis=1), data[\"diagnosis\"], test_size=0.2, random_state=2024)\n", "scaler = StandardScaler()\n", "X_train = scaler.fit_transform(X_train)\n", "X_test = scaler.transform(X_test)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:10:48.056685Z", "start_time": "2024-03-04T02:10:48.046335Z" } }, "id": "60e964547245ddee", "execution_count": 7 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "SVC()", "text/html": "
SVC()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = SVC()\n", "clf.fit(X_train, y_train)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:56:26.908104Z", "start_time": "2024-03-04T02:56:26.895390Z" } }, "id": "ebcf608a6ffb93e8", "execution_count": 8 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[69 2]\n", " [ 1 42]]\n" ] } ], "source": [ "y_pred = clf.predict(X_test)\n", "print(confusion_matrix(y_test, y_pred))" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:57:17.677193Z", "start_time": "2024-03-04T02:57:17.671854Z" } }, "id": "b454be0583a3c34f", "execution_count": 10 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9736842105263158\n" ] } ], "source": [ "print(accuracy_score(y_test, y_pred))\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:57:21.314255Z", "start_time": "2024-03-04T02:57:21.310040Z" } }, "id": "4575054b50215bc3", "execution_count": 11 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9545454545454546\n" ] } ], "source": [ "print(precision_score(y_test, y_pred))\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:57:25.007458Z", "start_time": "2024-03-04T02:57:25.002935Z" } }, "id": "c96ba7bcf7165d99", "execution_count": 12 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9767441860465116\n" ] } ], "source": [ "print(recall_score(y_test, y_pred))\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:57:27.718456Z", "start_time": "2024-03-04T02:57:27.714094Z" } }, "id": "2c02783417d4017a", "execution_count": 13 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9655172413793104\n" ] } ], "source": [ "print(f1_score(y_test, y_pred))\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:57:31.350717Z", "start_time": "2024-03-04T02:57:31.345851Z" } }, "id": "cc456866aa980c17", "execution_count": 14 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 13 18 33 38 48 56 59 77 83 87 114 125 148 156 159 166 169 171\n", " 177 178 183 188 200 204 206 214 217 218 235 239 255 265 266 279 293 309\n", " 317 322 328 339 349 365 367 369 372 378 383 384 403 413 414 424 431 443\n", " 454 9 22 23 34 45 53 68 72 79 95 105 127 133 136 140 144 160\n", " 176 185 187 190 195 207 220 222 228 247 248 253 254 259 262 263 264 274\n", " 285 286 291 303 311 316 318 320 321 323 333 335 344 348 354 358 376 377\n", " 379 388 389 397 400 445]\n" ] } ], "source": [ "print(clf.support_)\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:57:34.435226Z", "start_time": "2024-03-04T02:57:34.431502Z" } }, "id": "c525da4d8cccd3cd", "execution_count": 15 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[-0.17012437 -0.22924116 2.70478005 ... -0.55376737 -1.10363736\n", " -0.37583597]\n", " [-0.1734972 -1.23645223 -0.91151876 ... 0.62729503 0.3485014\n", " 2.31986969]\n", " [-0.17179579 -0.81441175 0.17478166 ... -0.3232618 0.10488602\n", " -0.74111368]\n", " ...\n", " [-0.17433342 1.64772112 1.11765281 ... 2.01983998 4.0616457\n", " 0.8796005 ]\n", " [-0.23937866 1.0321879 0.18653816 ... 0.0536319 3.17316606\n", " -1.23363529]\n", " [-0.23946255 -0.20163877 0.37699343 ... 1.6482965 -0.13076808\n", " 1.28875542]]\n" ] } ], "source": [ "print(clf.support_vectors_)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-04T02:57:37.595846Z", "start_time": "2024-03-04T02:57:37.592041Z" } }, "id": "55c1f301527e2dc8", "execution_count": 16 }, { "cell_type": "code", "outputs": [], "source": [], "metadata": { "collapsed": false }, "id": "1ab9229be9a4404b" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }