编辑代码



# 加载鸢尾花数据集
iris = datasets.load_iris()
x = iris.data
y = iris.target

# 分割数据集为训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

# 创建KNN分类器并训练
knn = KNeighborsClassifier()
knn.fit(x_train, y_train)

# 预测并打印结果
print(knn.predict(x_test))
print(y_test)








# 假设df是已加载的DataFrame
x = df[['最大周长','最大凹陷度']]  # 注意这里是双层括号
y = df['肿瘤性质']

# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.7, random_state=42)

# 创建并训练模型
model = GaussianNB()
model.fit(x_train, y_train)

# 预测和评估
y_test_pred = model.predict(x_test)
print( y_test_pred)

accuracy = accuracy_score(y_test_pred, y_test)
print("accuracy:"accuracy)


accuracy_score(y_test,y_test_pred)



x = df[['天气','温度','湿度']]
y = df['可否打球']

# 分割数据集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

# 创建并训练模型
model = GaussianNB()
model.fit(x_train, y_train)

# 预测并评估
y_test_pred = model.predict(x_test)
print("y_test_pred:", y_test_pred)

accuracy = accuracy_score(y_test, y_test_pred)
print("accuracy:", accuracy)



















x = data['预估薪资']
y = data['是否会购买']

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2020)

# 将数据reshape为2D数组(因为sklearn要求特征矩阵是2D的)
x_train = x_train.values.reshape(-1, 1)
x_test = x_test.values.reshape(-1, 1)

from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(x_train, y_train)

# 计算模型在测试集上的准确率
acc = model.score(x_test, y_test)
print( acc)

# 预测测试集结果
y_test_pred = model.predict(x_test)
print(y_test_pred)