From 07274909e223a92145da69b09009ce29fd43fd0d Mon Sep 17 00:00:00 2001 From: veypi Date: Tue, 5 Jan 2021 21:30:25 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B5=8B=E8=AF=95=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- numerical_analysis/8/main2.py | 35 ++++++++++++++ numerical_analysis/8/main4.py | 86 +++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 numerical_analysis/8/main2.py create mode 100644 numerical_analysis/8/main4.py diff --git a/numerical_analysis/8/main2.py b/numerical_analysis/8/main2.py new file mode 100644 index 0000000..881dc47 --- /dev/null +++ b/numerical_analysis/8/main2.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +import numpy as np +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn import preprocessing +from sklearn.ensemble import RandomForestRegressor +from sklearn.pipeline import make_pipeline +from sklearn.model_selection import GridSearchCV +from sklearn.metrics import mean_squared_error, r2_score + +# 导入数据 +dataset_url = r'/Users/light/workspace/courses/numerical_analysis/8/wine.csv' +data = pd.read_csv(dataset_url, sep=';') +print(data) +# print(data.describe()) +# 把数据分为训练集和测试集 +y = data.quality +X = data.drop('quality', axis=1) +X_train, X_test, y_train, y_test = train_test_split(X, y, + test_size=0.1, + random_state=123, + stratify=y) +# 声明数据预处理步骤 +pipeline = make_pipeline(preprocessing.StandardScaler(), + RandomForestRegressor(n_estimators=100)) +# 声明超参数 +hyperparameters = {'randomforestregressor__max_features': ['auto', 'sqrt', 'log2'], + 'randomforestregressor__max_depth': [None, 5, 3, 1]} +# 优化模型 +clf = GridSearchCV(pipeline, hyperparameters, cv=10) +clf.fit(X_train, y_train) +# 评估模型及预测 +pred = clf.predict(X_test) +print(r2_score(y_test, pred)) +print(mean_squared_error(y_test, pred)) diff --git a/numerical_analysis/8/main4.py b/numerical_analysis/8/main4.py new file mode 100644 index 0000000..8b79b2d --- /dev/null +++ b/numerical_analysis/8/main4.py @@ -0,0 +1,86 @@ +# -*- coding=utf-8 -*- +import numpy as np +import tensorflow as tf +from sklearn.metrics import mean_squared_error, r2_score + + +def normalize(x): + mean = np.mean(x) + std = np.std(x) + return (x - mean) / std + + +def append_bias_reshape(features, labels): + m = features.shape[0] + n = features.shape[1] + x = np.reshape(np.c_[np.ones(m), features], [m, n + 1]) + y = np.reshape(labels, [m, 1]) + return x, y + + +def load_data(): + p = r'/Users/light/workspace/courses/numerical_analysis/8/wine.csv' + with open(p, encoding='utf-8') as f: + data = np.loadtxt(f, delimiter=";", skiprows=1).astype('float64') + return data[:, :11], data[:, 11:] + + +x_all, y_all = load_data() +x_test = x_all[1400:, :11] +y_test = y_all[1400:] +y_train = y_all[:1400] +x_train = normalize(x_all[:1400, 0:1]) +for i in range(1, 11): + x_train = np.concatenate((x_train, normalize(x_all[:1400, i:i + 1])), 1) + +# x_train, y_train = append_bias_reshape(x_train, y_train) +# dataset = tfdata.Dataset.from_tensor_slices((x_train, y_train)) + +print(x_train.dtype) +W = tf.Variable(np.random.random([11, 1])) +b = tf.Variable(np.random.random([1, 1])) + + +def linear_regression(x): + return tf.matmul(x, W) + b + + +# 均方差 +def mean_square(y_pred, y_true): + return tf.reduce_sum(tf.pow(y_pred - y_true, 2)) / (2 * 1400) + + +optimizer = tf.optimizers.SGD(0.01) + + +# 优化过程 +def run_optimization(): + # 将计算封装在GradientTape中以实现自动微分 + with tf.GradientTape() as g: + pred = linear_regression(x_train) + loss = mean_square(pred, y_train) + + # 计算梯度 + gradients = g.gradient(loss, [W, b]) + + # 按gradients更新 W 和 b + optimizer.apply_gradients(zip(gradients, [W, b])) + + +training_steps = 1000 +display_step = 50 + +# 针对给定训练步骤数开始训练 +for step in range(1, training_steps + 1): + # 运行优化以更新W和b值 + run_optimization() + + if step % display_step == 0: + pred = linear_regression(x_train) + loss = mean_square(pred, y_train) + print("step: %i, loss: %f, W: %s, b: %s" % (step, loss, W.numpy(), b.numpy())) + +result = linear_regression(x_test).numpy().round() +delta = result - y_test +print(f"根均方误差(RMSE):{np.sqrt(mean_squared_error(result, y_test))}") +print(f"测试集R^2:{r2_score(y_test, result)}")