{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2024-03-03T12:37:14.219767Z", "start_time": "2024-03-03T12:37:14.199729Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " id diagnosis radius_mean texture_mean perimeter_mean area_mean \\\n", "0 842302 M 17.99 10.38 122.80 1001.0 \n", "1 842517 M 20.57 17.77 132.90 1326.0 \n", "2 84300903 M 19.69 21.25 130.00 1203.0 \n", "3 84348301 M 11.42 20.38 77.58 386.1 \n", "4 84358402 M 20.29 14.34 135.10 1297.0 \n", "\n", " smoothness_mean compactness_mean concavity_mean concave points_mean \\\n", "0 0.11840 0.27760 0.3001 0.14710 \n", "1 0.08474 0.07864 0.0869 0.07017 \n", "2 0.10960 0.15990 0.1974 0.12790 \n", "3 0.14250 0.28390 0.2414 0.10520 \n", "4 0.10030 0.13280 0.1980 0.10430 \n", "\n", " ... radius_worst texture_worst perimeter_worst area_worst \\\n", "0 ... 25.38 17.33 184.60 2019.0 \n", "1 ... 24.99 23.41 158.80 1956.0 \n", "2 ... 23.57 25.53 152.50 1709.0 \n", "3 ... 14.91 26.50 98.87 567.7 \n", "4 ... 22.54 16.67 152.20 1575.0 \n", "\n", " smoothness_worst compactness_worst concavity_worst concave_points_worst \\\n", "0 0.1622 0.6656 0.7119 0.2654 \n", "1 0.1238 0.1866 0.2416 0.1860 \n", "2 0.1444 0.4245 0.4504 0.2430 \n", "3 0.2098 0.8663 0.6869 0.2575 \n", "4 0.1374 0.2050 0.4000 0.1625 \n", "\n", " symmetry_worst fractal_dimension_worst \n", "0 0.4601 0.11890 \n", "1 0.2750 0.08902 \n", "2 0.3613 0.08758 \n", "3 0.6638 0.17300 \n", "4 0.2364 0.07678 \n", "\n", "[5 rows x 32 columns]\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n", "\n", "data = pd.read_csv(\"bc_data.csv\")\n", "print(data.head())" ] }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": " id radius_mean texture_mean perimeter_mean area_mean \\\ncount 5.690000e+02 569.000000 569.000000 569.000000 569.000000 \nmean 3.037183e+07 14.127292 19.289649 91.969033 654.889104 \nstd 1.250206e+08 3.524049 4.301036 24.298981 351.914129 \nmin 8.670000e+03 6.981000 9.710000 43.790000 143.500000 \n25% 8.692180e+05 11.700000 16.170000 75.170000 420.300000 \n50% 9.060240e+05 13.370000 18.840000 86.240000 551.100000 \n75% 8.813129e+06 15.780000 21.800000 104.100000 782.700000 \nmax 9.113205e+08 28.110000 39.280000 188.500000 2501.000000 \n\n smoothness_mean compactness_mean concavity_mean concave points_mean \\\ncount 569.000000 569.000000 569.000000 569.000000 \nmean 0.096360 0.104341 0.088799 0.048919 \nstd 0.014064 0.052813 0.079720 0.038803 \nmin 0.052630 0.019380 0.000000 0.000000 \n25% 0.086370 0.064920 0.029560 0.020310 \n50% 0.095870 0.092630 0.061540 0.033500 \n75% 0.105300 0.130400 0.130700 0.074000 \nmax 0.163400 0.345400 0.426800 0.201200 \n\n symmetry_mean ... radius_worst texture_worst perimeter_worst \\\ncount 569.000000 ... 569.000000 569.000000 569.000000 \nmean 0.181162 ... 16.269190 25.677223 107.261213 \nstd 0.027414 ... 4.833242 6.146258 33.602542 \nmin 0.106000 ... 7.930000 12.020000 50.410000 \n25% 0.161900 ... 13.010000 21.080000 84.110000 \n50% 0.179200 ... 14.970000 25.410000 97.660000 \n75% 0.195700 ... 18.790000 29.720000 125.400000 \nmax 0.304000 ... 36.040000 49.540000 251.200000 \n\n area_worst smoothness_worst compactness_worst concavity_worst \\\ncount 569.000000 569.000000 569.000000 569.000000 \nmean 880.583128 0.132369 0.254265 0.272188 \nstd 569.356993 0.022832 0.157336 0.208624 \nmin 185.200000 0.071170 0.027290 0.000000 \n25% 515.300000 0.116600 0.147200 0.114500 \n50% 686.500000 0.131300 0.211900 0.226700 \n75% 1084.000000 0.146000 0.339100 0.382900 \nmax 4254.000000 0.222600 1.058000 1.252000 \n\n concave_points_worst symmetry_worst fractal_dimension_worst \ncount 569.000000 569.000000 569.000000 \nmean 0.114606 0.290076 0.083946 \nstd 0.065732 0.061867 0.018061 \nmin 0.000000 0.156500 0.055040 \n25% 0.064930 0.250400 0.071460 \n50% 0.099930 0.282200 0.080040 \n75% 0.161400 0.317900 0.092080 \nmax 0.291000 0.663800 0.207500 \n\n[8 rows x 31 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idradius_meantexture_meanperimeter_meanarea_meansmoothness_meancompactness_meanconcavity_meanconcave points_meansymmetry_mean...radius_worsttexture_worstperimeter_worstarea_worstsmoothness_worstcompactness_worstconcavity_worstconcave_points_worstsymmetry_worstfractal_dimension_worst
count5.690000e+02569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000...569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000569.000000
mean3.037183e+0714.12729219.28964991.969033654.8891040.0963600.1043410.0887990.0489190.181162...16.26919025.677223107.261213880.5831280.1323690.2542650.2721880.1146060.2900760.083946
std1.250206e+083.5240494.30103624.298981351.9141290.0140640.0528130.0797200.0388030.027414...4.8332426.14625833.602542569.3569930.0228320.1573360.2086240.0657320.0618670.018061
min8.670000e+036.9810009.71000043.790000143.5000000.0526300.0193800.0000000.0000000.106000...7.93000012.02000050.410000185.2000000.0711700.0272900.0000000.0000000.1565000.055040
25%8.692180e+0511.70000016.17000075.170000420.3000000.0863700.0649200.0295600.0203100.161900...13.01000021.08000084.110000515.3000000.1166000.1472000.1145000.0649300.2504000.071460
50%9.060240e+0513.37000018.84000086.240000551.1000000.0958700.0926300.0615400.0335000.179200...14.97000025.41000097.660000686.5000000.1313000.2119000.2267000.0999300.2822000.080040
75%8.813129e+0615.78000021.800000104.100000782.7000000.1053000.1304000.1307000.0740000.195700...18.79000029.720000125.4000001084.0000000.1460000.3391000.3829000.1614000.3179000.092080
max9.113205e+0828.11000039.280000188.5000002501.0000000.1634000.3454000.4268000.2012000.304000...36.04000049.540000251.2000004254.0000000.2226001.0580001.2520000.2910000.6638000.207500
\n

8 rows × 31 columns

\n
" }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.describe()\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:37:34.624072Z", "start_time": "2024-03-03T12:37:34.592026Z" } }, "id": "ce5939bbd4b11119", "execution_count": 4 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 569 entries, 0 to 568\n", "Data columns (total 32 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 id 569 non-null int64 \n", " 1 diagnosis 569 non-null object \n", " 2 radius_mean 569 non-null float64\n", " 3 texture_mean 569 non-null float64\n", " 4 perimeter_mean 569 non-null float64\n", " 5 area_mean 569 non-null float64\n", " 6 smoothness_mean 569 non-null float64\n", " 7 compactness_mean 569 non-null float64\n", " 8 concavity_mean 569 non-null float64\n", " 9 concave points_mean 569 non-null float64\n", " 10 symmetry_mean 569 non-null float64\n", " 11 fractal_dimension_mean 569 non-null float64\n", " 12 radius_se 569 non-null float64\n", " 13 texture_se 569 non-null float64\n", " 14 perimeter_se 569 non-null float64\n", " 15 area_se 569 non-null float64\n", " 16 smoothness_se 569 non-null float64\n", " 17 compactness_se 569 non-null float64\n", " 18 concavity_se 569 non-null float64\n", " 19 concave points_se 569 non-null float64\n", " 20 symmetry_se 569 non-null float64\n", " 21 fractal_dimension_se 569 non-null float64\n", " 22 radius_worst 569 non-null float64\n", " 23 texture_worst 569 non-null float64\n", " 24 perimeter_worst 569 non-null float64\n", " 25 area_worst 569 non-null float64\n", " 26 smoothness_worst 569 non-null float64\n", " 27 compactness_worst 569 non-null float64\n", " 28 concavity_worst 569 non-null float64\n", " 29 concave_points_worst 569 non-null float64\n", " 30 symmetry_worst 569 non-null float64\n", " 31 fractal_dimension_worst 569 non-null float64\n", "dtypes: float64(30), int64(1), object(1)\n", "memory usage: 142.4+ KB\n" ] } ], "source": [ "data.info()\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:37:39.116253Z", "start_time": "2024-03-03T12:37:39.110965Z" } }, "id": "968d59778944a1be", "execution_count": 5 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "(569, 32)" }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.shape" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:37:40.558748Z", "start_time": "2024-03-03T12:37:40.555663Z" } }, "id": "8a804c27a82f52c0", "execution_count": 6 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "id 0\ndiagnosis 0\nradius_mean 0\ntexture_mean 0\nperimeter_mean 0\narea_mean 0\nsmoothness_mean 0\ncompactness_mean 0\nconcavity_mean 0\nconcave points_mean 0\nsymmetry_mean 0\nfractal_dimension_mean 0\nradius_se 0\ntexture_se 0\nperimeter_se 0\narea_se 0\nsmoothness_se 0\ncompactness_se 0\nconcavity_se 0\nconcave points_se 0\nsymmetry_se 0\nfractal_dimension_se 0\nradius_worst 0\ntexture_worst 0\nperimeter_worst 0\narea_worst 0\nsmoothness_worst 0\ncompactness_worst 0\nconcavity_worst 0\nconcave_points_worst 0\nsymmetry_worst 0\nfractal_dimension_worst 0\ndtype: int64" }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.isnull().sum()\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:38:05.672870Z", "start_time": "2024-03-03T12:38:05.666989Z" } }, "id": "27fb5c5ebe568c89", "execution_count": 7 }, { "cell_type": "code", "outputs": [], "source": [ "from sklearn.preprocessing import LabelEncoder\n", "\n", "le = LabelEncoder()\n", "data[\"diagnosis\"] = le.fit_transform(data[\"diagnosis\"])" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:38:14.612210Z", "start_time": "2024-03-03T12:38:14.608859Z" } }, "id": "921f15387d3f20d", "execution_count": 8 }, { "cell_type": "code", "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(data.drop(\"diagnosis\", axis=1), \n", " data[\"diagnosis\"], \n", " test_size=0.25, \n", " random_state=0)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:38:22.649922Z", "start_time": "2024-03-03T12:38:22.645136Z" } }, "id": "65fb42d7ccf169e3", "execution_count": 9 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "训练集数据量: (426, 31)\n", "测试集数据量: (143, 31)\n" ] } ], "source": [ "print(\"训练集数据量:\", X_train.shape)\n", "print(\"测试集数据量:\", X_test.shape)\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:38:40.264544Z", "start_time": "2024-03-03T12:38:40.261072Z" } }, "id": "6d82d79ea7a3609f", "execution_count": 10 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "GaussianNB()", "text/html": "
GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = GaussianNB()\n", "model.fit(X_train, y_train)\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:38:47.313263Z", "start_time": "2024-03-03T12:38:47.307431Z" } }, "id": "15f256db08ba4d28", "execution_count": 11 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "准确率: 0.6363636363636364\n", "精确率: 0.6666666666666666\n", "召回率: 0.03773584905660377\n", "F1 值: 0.07142857142857142\n" ] } ], "source": [ "y_pred = model.predict(X_test)\n", "print(\"准确率:\", accuracy_score(y_test, y_pred))\n", "print(\"精确率:\", precision_score(y_test, y_pred))\n", "print(\"召回率:\", recall_score(y_test, y_pred))\n", "print(\"F1 值:\", f1_score(y_test, y_pred))\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:40:07.656413Z", "start_time": "2024-03-03T12:40:07.647268Z" } }, "id": "85299c4605550166", "execution_count": 13 }, { "cell_type": "code", "outputs": [ { "data": { "text/plain": "GridSearchCV(cv=5, estimator=GaussianNB(),\n param_grid={'var_smoothing': [1e-07, 1e-08, 1e-09, 1e-10, 1e-11,\n 1e-12]},\n scoring='accuracy')", "text/html": "
GridSearchCV(cv=5, estimator=GaussianNB(),\n             param_grid={'var_smoothing': [1e-07, 1e-08, 1e-09, 1e-10, 1e-11,\n                                           1e-12]},\n             scoring='accuracy')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import GridSearchCV\n", "\n", "param_grid = {\"var_smoothing\": [1e-7, 1e-8, 1e-9, 1e-10, 1e-11, 1e-12]}\n", "\n", "grid_search = GridSearchCV(GaussianNB(), param_grid=param_grid, scoring=\"accuracy\", cv=5)\n", "grid_search.fit(X_train, y_train)\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:40:16.442517Z", "start_time": "2024-03-03T12:40:16.378989Z" } }, "id": "5076117fd493d87b", "execution_count": 14 }, { "cell_type": "code", "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "最优参数: {'var_smoothing': 1e-12}\n" ] } ], "source": [ "print(\"最优参数:\", grid_search.best_params_)\n" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-03-03T12:40:59.139337Z", "start_time": "2024-03-03T12:40:59.136232Z" } }, "id": "5671b41a8291920", "execution_count": 15 }, { "cell_type": "code", "outputs": [], "source": [], "metadata": { "collapsed": false }, "id": "5eb3263bc83c44da" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }