2021-01-05 21:30:25 +08:00

87 lines
2.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding=utf-8 -*-
import numpy as np
import tensorflow as tf
from sklearn.metrics import mean_squared_error, r2_score
def normalize(x):
mean = np.mean(x)
std = np.std(x)
return (x - mean) / std
def append_bias_reshape(features, labels):
m = features.shape[0]
n = features.shape[1]
x = np.reshape(np.c_[np.ones(m), features], [m, n + 1])
y = np.reshape(labels, [m, 1])
return x, y
def load_data():
p = r'/Users/light/workspace/courses/numerical_analysis/8/wine.csv'
with open(p, encoding='utf-8') as f:
data = np.loadtxt(f, delimiter=";", skiprows=1).astype('float64')
return data[:, :11], data[:, 11:]
x_all, y_all = load_data()
x_test = x_all[1400:, :11]
y_test = y_all[1400:]
y_train = y_all[:1400]
x_train = normalize(x_all[:1400, 0:1])
for i in range(1, 11):
x_train = np.concatenate((x_train, normalize(x_all[:1400, i:i + 1])), 1)
# x_train, y_train = append_bias_reshape(x_train, y_train)
# dataset = tfdata.Dataset.from_tensor_slices((x_train, y_train))
print(x_train.dtype)
W = tf.Variable(np.random.random([11, 1]))
b = tf.Variable(np.random.random([1, 1]))
def linear_regression(x):
return tf.matmul(x, W) + b
# 均方差
def mean_square(y_pred, y_true):
return tf.reduce_sum(tf.pow(y_pred - y_true, 2)) / (2 * 1400)
optimizer = tf.optimizers.SGD(0.01)
# 优化过程
def run_optimization():
# 将计算封装在GradientTape中以实现自动微分
with tf.GradientTape() as g:
pred = linear_regression(x_train)
loss = mean_square(pred, y_train)
# 计算梯度
gradients = g.gradient(loss, [W, b])
# 按gradients更新 W 和 b
optimizer.apply_gradients(zip(gradients, [W, b]))
training_steps = 1000
display_step = 50
# 针对给定训练步骤数开始训练
for step in range(1, training_steps + 1):
# 运行优化以更新W和b值
run_optimization()
if step % display_step == 0:
pred = linear_regression(x_train)
loss = mean_square(pred, y_train)
print("step: %i, loss: %f, W: %s, b: %s" % (step, loss, W.numpy(), b.numpy()))
result = linear_regression(x_test).numpy().round()
delta = result - y_test
print(f"根均方误差(RMSE){np.sqrt(mean_squared_error(result, y_test))}")
print(f"测试集R^2{r2_score(y_test, result)}")