import pandas as pdfrom sklearn import preprocessingfrom sklearn.model_selection import train_test_splitfrom sklearn import svmfrom sklearn.utils.validation import column_or_1dimport numpy as npfrom sklearn.model_selection import GridSearchCV
读取数据
original_data = pd.read_csv("krkopt.data")
增加表头 格式化数据
original_data.columns = ["wx", "wy", "wwx", "wwy", "vx", "vy", "outcome"]original_data.replace(to_replace={'^a$': 1, '^b$': 2, '^c$': 3, '^d$': 4, '^e$': 5, '^f$': 6, '^g$': 7, '^h$': 8, '^draw$': 1, "(?!draw)": 0}, regex=True, inplace=True)original_data.head
<bound method NDFrame.head of wx wy wwx wwy vx vy outcome0 1 1 3 1 3 2 11 1 1 3 1 4 1 12 1 1 3 1 4 2 13 1 1 3 2 3 1 14 1 1 3 2 3 3 1... .. .. ... ... .. .. ...28050 2 1 7 7 5 5 028051 2 1 7 7 5 6 028052 2 1 7 7 5 7 028053 2 1 7 7 6 5 028054 2 1 7 7 7 5 0[28055 rows x 7 columns]>
数据归一化
original_data[['wx', 'wy', 'wwx', 'wwy', 'vx', 'vy']] = preprocessing.scale(original_data[['wx', 'wy', 'wwx', 'wwy', 'vx', 'vy']])pd.DataFrame(data=original_data).to_csv("krkopt_fill.csv")original_data.shape
(28055, 7)
切割输入数据和输出数据
new_original_data = pd.read_csv("krkopt_fill.csv")original_data_x = new_original_data[['wx', 'wy', 'wwx', 'wwy', 'vx', 'vy']]original_data_y = new_original_data[['outcome']]original_data_x.head(5)original_data_y.head(5)
.dataframe tbody tr th:only-of-type { vertical-align: middle; }
.dataframe tbody tr th { vertical-align: top;}.dataframe thead th { text-align: right;}