划分特征变量X和目标变量y
x = df.drop(columns=['111'])
y = df['111']
划分训练集和测试集
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=42)
#导入扑素贝叶斯模型,这里选用高斯分类器
from sklearn.naive_bayes import GaussianNB
model =GaussianNB()
model.fit(x_train,y_train)
#导入逻辑回归模型
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(x_train,y_train)
#导入随机森林模型
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(x_train,y_train)
#导入决策树模型
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(x_train,y_train)
导入k近邻模型
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
model.fit(x_train,y_train)
模型预测与评估
y_test_pred = model.predict(x_test)
print(y_test_pred)
测试准确度/精确度,并输出
accuracy = model.score(x_test,y_test)
print(accuracy)
data.info
data.shape
data.dtypes
data.head()
data.tail()
data.isnull()
data.dropna()
data.dropna(axis = 1)
data.fillna(0)
data.duplicated()
data.duplicated(['aa''bb'])
data.drop_duplicates()
data.drop_duplicated(['aa''bb'])