场景描述:基于物联网检测设备,大量采集流量数据和液位高度数据存储在时序数据库(influxdb)中。并采用下面代码对数据进行预测
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# Author:李智敏
# Wechat:anark919
# Date:2024-01-05 10:17
# Title:
import os.path
import uuidimport pandas as pdclass Neural_networks():def __init__(self,para_fields:list,outcome_fields:list,model_dir:str,model_name:str=uuid.uuid4().hex):''':param para_fields: 自变量:param outcome_fields: 因变量:param model_dir: 模型存储地址:param model_name: 模型名称,不传,以uuid生成唯一识别名称'''self.para_fields=para_fieldsself.outcome_fields=outcome_fieldsself.model_name = model_nameself.model_dir = os.path.join(model_dir,model_name)if not os.path.exists(self.model_dir ):os.makedirs(self.model_dir )def training(self,data:pd.DataFrame,epochs=50000):'''模型训练'''import pandas as pdimport matplotlib.pyplot as pltfrom sklearn.preprocessing import MinMaxScalerfrom keras.models import Sequentialfrom keras.layers import Densefrom keras.optimizers import Adamfrom keras import regularizers# 加载数据df = datarain_dir = os.path.join(self.model_dir ,f"train_{self.model_name}.csv")df.to_csv(rain_dir, index=False)x = df[self.para_fields].valuesy = df[self.outcome_fields].values# 数据归一化x_scaler = MinMaxScaler(feature_range=(-1, 1))y_scaler = MinMaxScaler(feature_range=(-1, 1))x = x_scaler.fit_transform(x)y = y_scaler.fit_transform(y)# 定义神经网络模型model = Sequential()# model.add(Dense(10, activation='relu', input_shape=(3,), kernel_regularizer=regularizers.l2(0.01)))model.add(Dense(10, activation='relu', input_shape=(len(self.para_fields),), kernel_regularizer=regularizers.l2(0.01)))model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))model.add(Dense(1, activation='linear'))# 误差记录optimizer = Adam(lr=0.0001)model.compile(optimizer=optimizer, loss='mse')# 训练模型history = model.fit(x, y, epochs=epochs, batch_size=67)# 评估模型mse = model.evaluate(x, y)print('Validation MSE:', mse)# 保存模型的权重和偏差model_dir = os.path.join(self.model_dir ,f"{self.model_name}.h5")model.save(model_dir)# 误差曲线# 设置中文显示和解决负号显示问题plt.rcParams['font.sans-serif'] = ['SimHei']plt.rcParams['axes.unicode_minus'] = Falseplt.plot(history.history['loss'])plt.title("模型误差")plt.ylabel("误差")plt.xlabel("循环次数")img_dir = os.path.join(self.model_dir ,f"误差曲线_{self.model_name}.png")plt.savefig(img_dir)plt.show()# 预测值输出y_pred = model.predict(x)# 预测值反归一化y = y_scaler.inverse_transform(y)y_pred = y_scaler.inverse_transform(y_pred)print("the prediction is:", y_pred)# 将预测值存储到Excel表中df_out = pd.DataFrame(y_pred, columns=self.outcome_fields)Prediction_dir = os.path.join(self.model_dir ,f"prediction_train_{self.model_name}.xlsx")df_out.to_excel(Prediction_dir, index=False)# 实际值与预测值的对比图# 设置中文显示和解决负号显示问题plt.rcParams['font.sans-serif'] = ['SimHei']plt.rcParams['axes.unicode_minus'] = Falseplt.scatter(y, y_pred)plt.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)plt.xlabel("实际值")plt.ylabel("预测值")img_dir = os.path.join(self.model_dir ,f"对比图_{self.model_name}.png")plt.savefig(img_dir)plt.show()return Truedef forecast(self,data:pd.DataFrame):'''模型预测'''import pandas as pdfrom sklearn.preprocessing import MinMaxScalerfrom keras.models import load_model# 数据预处理(归一化)rain_dir = os.path.join(self.model_dir ,f"train_{self.model_name}.csv")df = pd.read_csv(rain_dir)x = df[self.para_fields].valuesy = df[self.outcome_fields].valuesx_scaler = MinMaxScaler(feature_range=(-1, 1))y_scaler = MinMaxScaler(feature_range=(-1, 1))x = x_scaler.fit_transform(x)y = y_scaler.fit_transform(y)# 加载预测数据df_test = datax_test = df_test[self.para_fields].values# 预测数据归一化x_test = x_scaler.transform(x_test)# 加载训练好的神经网络模型# 中文路径会报编码错误model_dir = os.path.join(self.model_dir ,f"{self.model_name}.h5")model = load_model(model_dir)# 对预测数据进行预测y_pred = model.predict(x_test)y_pred = y_scaler.inverse_transform(y_pred)# print(y_pred)# 将预测值存储到Excel表中df_out = pd.DataFrame(y_pred, columns=self.outcome_fields)df_out = pd.merge(df_test,df_out,how='outer',left_index=True,right_index=True)Prediction_dir = os.path.join(self.model_dir ,f"prediction_{self.model_name}.xlsx")df_out.to_excel(Prediction_dir, index=False)return df_out.T.to_dict().values()if __name__ == '__main__':# df = pd.read_csv('train.csv')from 数据库操作 import influxdb_querydata1 = influxdb_query('SELECT para505 FROM "device_YL_315103022220A6D3_1" limit 300')data = influxdb_query('SELECT para168 FROM "device_LD_716001012220A5E8_1" limit 300')# print(data[0]['time'])# print(data1)df = pd.DataFrame([{'time':k['time'],'para505':k['para505'],'para168':v['para168']} for k,v in zip(data1[:100],data[:100])])df1 = pd.DataFrame([{'time':k['time'],'para505':k['para505']} for k,v in zip(data1[100:],data[100:])])# print(df,df1)# # df1 = pd.read_csv('test.csv')os.chdir('D://')# n = Neural_networks(['x1', 'x2', 'x3'],['y'],model_dir='.',model_name='f39e6103681244a5a092ef9e2759b61c')n = Neural_networks(['para505'],['para168'],model_dir='.',model_name='f39e6103681244a5a092ef9e2759b61c')# print(n.training(df,epochs=5000))print(n.forecast(df1))