测试代码
This commit is contained in:
parent
3649a05348
commit
07274909e2
35
numerical_analysis/8/main2.py
Normal file
35
numerical_analysis/8/main2.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn import preprocessing
|
||||||
|
from sklearn.ensemble import RandomForestRegressor
|
||||||
|
from sklearn.pipeline import make_pipeline
|
||||||
|
from sklearn.model_selection import GridSearchCV
|
||||||
|
from sklearn.metrics import mean_squared_error, r2_score
|
||||||
|
|
||||||
|
# 导入数据
|
||||||
|
dataset_url = r'/Users/light/workspace/courses/numerical_analysis/8/wine.csv'
|
||||||
|
data = pd.read_csv(dataset_url, sep=';')
|
||||||
|
print(data)
|
||||||
|
# print(data.describe())
|
||||||
|
# 把数据分为训练集和测试集
|
||||||
|
y = data.quality
|
||||||
|
X = data.drop('quality', axis=1)
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
||||||
|
test_size=0.1,
|
||||||
|
random_state=123,
|
||||||
|
stratify=y)
|
||||||
|
# 声明数据预处理步骤
|
||||||
|
pipeline = make_pipeline(preprocessing.StandardScaler(),
|
||||||
|
RandomForestRegressor(n_estimators=100))
|
||||||
|
# 声明超参数
|
||||||
|
hyperparameters = {'randomforestregressor__max_features': ['auto', 'sqrt', 'log2'],
|
||||||
|
'randomforestregressor__max_depth': [None, 5, 3, 1]}
|
||||||
|
# 优化模型
|
||||||
|
clf = GridSearchCV(pipeline, hyperparameters, cv=10)
|
||||||
|
clf.fit(X_train, y_train)
|
||||||
|
# 评估模型及预测
|
||||||
|
pred = clf.predict(X_test)
|
||||||
|
print(r2_score(y_test, pred))
|
||||||
|
print(mean_squared_error(y_test, pred))
|
||||||
86
numerical_analysis/8/main4.py
Normal file
86
numerical_analysis/8/main4.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
# -*- coding=utf-8 -*-
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
|
from sklearn.metrics import mean_squared_error, r2_score
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(x):
|
||||||
|
mean = np.mean(x)
|
||||||
|
std = np.std(x)
|
||||||
|
return (x - mean) / std
|
||||||
|
|
||||||
|
|
||||||
|
def append_bias_reshape(features, labels):
|
||||||
|
m = features.shape[0]
|
||||||
|
n = features.shape[1]
|
||||||
|
x = np.reshape(np.c_[np.ones(m), features], [m, n + 1])
|
||||||
|
y = np.reshape(labels, [m, 1])
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
|
||||||
|
def load_data():
|
||||||
|
p = r'/Users/light/workspace/courses/numerical_analysis/8/wine.csv'
|
||||||
|
with open(p, encoding='utf-8') as f:
|
||||||
|
data = np.loadtxt(f, delimiter=";", skiprows=1).astype('float64')
|
||||||
|
return data[:, :11], data[:, 11:]
|
||||||
|
|
||||||
|
|
||||||
|
x_all, y_all = load_data()
|
||||||
|
x_test = x_all[1400:, :11]
|
||||||
|
y_test = y_all[1400:]
|
||||||
|
y_train = y_all[:1400]
|
||||||
|
x_train = normalize(x_all[:1400, 0:1])
|
||||||
|
for i in range(1, 11):
|
||||||
|
x_train = np.concatenate((x_train, normalize(x_all[:1400, i:i + 1])), 1)
|
||||||
|
|
||||||
|
# x_train, y_train = append_bias_reshape(x_train, y_train)
|
||||||
|
# dataset = tfdata.Dataset.from_tensor_slices((x_train, y_train))
|
||||||
|
|
||||||
|
print(x_train.dtype)
|
||||||
|
W = tf.Variable(np.random.random([11, 1]))
|
||||||
|
b = tf.Variable(np.random.random([1, 1]))
|
||||||
|
|
||||||
|
|
||||||
|
def linear_regression(x):
|
||||||
|
return tf.matmul(x, W) + b
|
||||||
|
|
||||||
|
|
||||||
|
# 均方差
|
||||||
|
def mean_square(y_pred, y_true):
|
||||||
|
return tf.reduce_sum(tf.pow(y_pred - y_true, 2)) / (2 * 1400)
|
||||||
|
|
||||||
|
|
||||||
|
optimizer = tf.optimizers.SGD(0.01)
|
||||||
|
|
||||||
|
|
||||||
|
# 优化过程
|
||||||
|
def run_optimization():
|
||||||
|
# 将计算封装在GradientTape中以实现自动微分
|
||||||
|
with tf.GradientTape() as g:
|
||||||
|
pred = linear_regression(x_train)
|
||||||
|
loss = mean_square(pred, y_train)
|
||||||
|
|
||||||
|
# 计算梯度
|
||||||
|
gradients = g.gradient(loss, [W, b])
|
||||||
|
|
||||||
|
# 按gradients更新 W 和 b
|
||||||
|
optimizer.apply_gradients(zip(gradients, [W, b]))
|
||||||
|
|
||||||
|
|
||||||
|
training_steps = 1000
|
||||||
|
display_step = 50
|
||||||
|
|
||||||
|
# 针对给定训练步骤数开始训练
|
||||||
|
for step in range(1, training_steps + 1):
|
||||||
|
# 运行优化以更新W和b值
|
||||||
|
run_optimization()
|
||||||
|
|
||||||
|
if step % display_step == 0:
|
||||||
|
pred = linear_regression(x_train)
|
||||||
|
loss = mean_square(pred, y_train)
|
||||||
|
print("step: %i, loss: %f, W: %s, b: %s" % (step, loss, W.numpy(), b.numpy()))
|
||||||
|
|
||||||
|
result = linear_regression(x_test).numpy().round()
|
||||||
|
delta = result - y_test
|
||||||
|
print(f"根均方误差(RMSE):{np.sqrt(mean_squared_error(result, y_test))}")
|
||||||
|
print(f"测试集R^2:{r2_score(y_test, result)}")
|
||||||
Loading…
x
Reference in New Issue
Block a user