测试代码

2021-01-05 21:30:25 +08:00 · 2021-01-05 21:30:25 +08:00 · 07274909e2
commit 07274909e2
parent 3649a05348
2 changed files with 121 additions and 0 deletions
--- a/numerical_analysis/8/main2.py
+++ b/numerical_analysis/8/main2.py
@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn import preprocessing
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.pipeline import make_pipeline
+from sklearn.model_selection import GridSearchCV
+from sklearn.metrics import mean_squared_error, r2_score
+
+# 导入数据
+dataset_url = r'/Users/light/workspace/courses/numerical_analysis/8/wine.csv'
+data = pd.read_csv(dataset_url, sep=';')
+print(data)
+# print(data.describe())
+# 把数据分为训练集和测试集
+y = data.quality
+X = data.drop('quality', axis=1)
+X_train, X_test, y_train, y_test = train_test_split(X, y,
+                                                    test_size=0.1,
+                                                    random_state=123,
+                                                    stratify=y)
+# 声明数据预处理步骤
+pipeline = make_pipeline(preprocessing.StandardScaler(),
+                         RandomForestRegressor(n_estimators=100))
+# 声明超参数
+hyperparameters = {'randomforestregressor__max_features': ['auto', 'sqrt', 'log2'],
+                   'randomforestregressor__max_depth': [None, 5, 3, 1]}
+# 优化模型
+clf = GridSearchCV(pipeline, hyperparameters, cv=10)
+clf.fit(X_train, y_train)
+# 评估模型及预测
+pred = clf.predict(X_test)
+print(r2_score(y_test, pred))
+print(mean_squared_error(y_test, pred))
--- a/numerical_analysis/8/main4.py
+++ b/numerical_analysis/8/main4.py
@ -0,0 +1,86 @@
+# -*- coding=utf-8 -*-
+import numpy as np
+import tensorflow as tf
+from sklearn.metrics import mean_squared_error, r2_score
+
+
+def normalize(x):
+    mean = np.mean(x)
+    std = np.std(x)
+    return (x - mean) / std
+
+
+def append_bias_reshape(features, labels):
+    m = features.shape[0]
+    n = features.shape[1]
+    x = np.reshape(np.c_[np.ones(m), features], [m, n + 1])
+    y = np.reshape(labels, [m, 1])
+    return x, y
+
+
+def load_data():
+    p = r'/Users/light/workspace/courses/numerical_analysis/8/wine.csv'
+    with open(p, encoding='utf-8') as f:
+        data = np.loadtxt(f, delimiter=";", skiprows=1).astype('float64')
+        return data[:, :11], data[:, 11:]
+
+
+x_all, y_all = load_data()
+x_test = x_all[1400:, :11]
+y_test = y_all[1400:]
+y_train = y_all[:1400]
+x_train = normalize(x_all[:1400, 0:1])
+for i in range(1, 11):
+    x_train = np.concatenate((x_train, normalize(x_all[:1400, i:i + 1])), 1)
+
+# x_train, y_train = append_bias_reshape(x_train, y_train)
+# dataset = tfdata.Dataset.from_tensor_slices((x_train, y_train))
+
+print(x_train.dtype)
+W = tf.Variable(np.random.random([11, 1]))
+b = tf.Variable(np.random.random([1, 1]))
+
+
+def linear_regression(x):
+    return tf.matmul(x, W) + b
+
+
+# 均方差
+def mean_square(y_pred, y_true):
+    return tf.reduce_sum(tf.pow(y_pred - y_true, 2)) / (2 * 1400)
+
+
+optimizer = tf.optimizers.SGD(0.01)
+
+
+# 优化过程
+def run_optimization():
+    # 将计算封装在GradientTape中以实现自动微分
+    with tf.GradientTape() as g:
+        pred = linear_regression(x_train)
+        loss = mean_square(pred, y_train)
+
+    # 计算梯度
+    gradients = g.gradient(loss, [W, b])
+
+    # 按gradients更新 W 和 b
+    optimizer.apply_gradients(zip(gradients, [W, b]))
+
+
+training_steps = 1000
+display_step = 50
+
+# 针对给定训练步骤数开始训练
+for step in range(1, training_steps + 1):
+    # 运行优化以更新W和b值
+    run_optimization()
+
+    if step % display_step == 0:
+        pred = linear_regression(x_train)
+        loss = mean_square(pred, y_train)
+        print("step: %i, loss: %f, W: %s, b: %s" % (step, loss, W.numpy(), b.numpy()))
+
+result = linear_regression(x_test).numpy().round()
+delta = result - y_test
+print(f"根均方误差(RMSE)：{np.sqrt(mean_squared_error(result, y_test))}")
+print(f"测试集R^2：{r2_score(y_test, result)}")