# ...existing code... import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, TensorDataset import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.neighbors import KNeighborsRegressor from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split # ...existing code... def normx(x, train_statsX): return (x - train_statsX['mean']) / train_statsX['std'] def norm(y, train_statsY): return (y - train_statsY['mean']) / train_statsY['std'] def denorm(y, train_statsY): return (y* train_statsY['std'] + train_statsY['mean']) def mean_aep(u1,u2): return (round(100*(100*sum(abs((u2-u1)/u1))/len(u1)))/100) def max_aep(u1,u2): return (round(100*(100*max(abs((u2-u1)/u1))))/100) df = pd.read_csv("./MUTC1750designs.csv") df[df.columns[22:40]] =np.log10(df[df.columns[22:40]]) print(df.shape) print(df.head(3)) print(df.describe()) print(df[df.columns[0:5]].std()/df[df.columns[0:5]].mean()) # ANN parameters ac = 'relu' # activation function nnno = 48 # number of neurons dr_rate = 0.2 # dropout rate EPOCHS = 400 # number of epocs LR = 0.001 # learning rate for var_index in np.arange(5): X_Train, X_Test, Y_Train, Y_Test = train_test_split(df.iloc[0:-1, 5:40], df.iloc[0:-1, var_index], test_size=0.2, random_state=55) train_statsY = Y_Train.describe().transpose() train_statsX = X_Train.describe().transpose() XX = normx(X_Train, train_statsX) YY = norm(Y_Train, train_statsY) xx = normx(X_Test, train_statsX) yy = norm(Y_Test, train_statsY) # Convert to PyTorch tensors X_train_tensor = torch.tensor(XX.values, dtype=torch.float32) Y_train_tensor = torch.tensor(YY.values, dtype=torch.float32).view(-1, 1) X_test_tensor = torch.tensor(xx.values, dtype=torch.float32) Y_test_tensor = torch.tensor(yy.values, dtype=torch.float32).view(-1, 1) train_dataset = TensorDataset(X_train_tensor, Y_train_tensor) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) class ANNModel(nn.Module): def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate): super(ANNModel, self).__init__() self.fc1 = nn.Linear(input_dim, hidden_dim) self.dropout1 = nn.Dropout(dropout_rate) self.fc2 = nn.Linear(hidden_dim, hidden_dim) self.dropout2 = nn.Dropout(dropout_rate) self.fc3 = nn.Linear(hidden_dim + input_dim, hidden_dim) self.dropout3 = nn.Dropout(dropout_rate) self.fc4 = nn.Linear(hidden_dim, hidden_dim) self.dropout4 = nn.Dropout(dropout_rate) self.fc5 = nn.Linear(hidden_dim + input_dim, hidden_dim) self.dropout5 = nn.Dropout(dropout_rate) self.fc6 = nn.Linear(hidden_dim, output_dim) def forward(self, x): x1 = torch.relu(self.fc1(x)) x1 = self.dropout1(x1) x1 = torch.relu(self.fc2(x1)) x1 = self.dropout2(x1) x1 = torch.cat((x1, x), dim=1) #拼接 x1 = torch.relu(self.fc3(x1)) x1 = self.dropout3(x1) x1 = torch.relu(self.fc4(x1)) x1 = self.dropout4(x1) x1 = torch.cat((x1, x), dim=1) x1 = torch.relu(self.fc5(x1)) x1 = self.dropout5(x1) output = self.fc6(x1) return output #通过多层次的特征提取和拼接操作,增强了模型的表达能力 input_dim = X_Train.shape[1] hidden_dim = nnno output_dim = 1 dropout_rate = dr_rate model = ANNModel(input_dim, hidden_dim, output_dim, dropout_rate) criterion = nn.MSELoss() optimizer = optim.Adamax(model.parameters(), lr=LR) # Training loop model.train() history = {'loss': [], 'val_loss': []} for epoch in range(EPOCHS): for X_batch, Y_batch in train_loader: optimizer.zero_grad() outputs = model(X_batch) loss = criterion(outputs, Y_batch) loss.backward() history['loss'].append(loss.item()) optimizer.step() # Evaluation model.eval() with torch.no_grad(): test_predictions = model(X_test_tensor).numpy() history['val_loss'].append(criterion(torch.tensor(test_predictions), Y_test_tensor).item()) # Evaluation model.eval() with torch.no_grad(): test_predictions = model(X_test_tensor).numpy() u1 = denorm(yy, train_statsY).to_numpy() u2 = denorm(pd.Series(np.squeeze(test_predictions)), train_statsY).to_numpy() # Plot losses plt.figure(var_index + 10) plt.plot(history['loss']) plt.plot(history['val_loss']) plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper right') plt.show() # Plot truth vs. prediction x1 = min(min(u1), min(u2)) x2 = max(max(u1), max(u2)) plt.figure(var_index) plt.plot([x1, x2], [x1, x2], color='red') plt.scatter(u1, u2) plt.xlabel('Ground Truth') plt.ylabel('Prediction') plt.gca().set_aspect('equal', adjustable='box') plt.grid(color='grey', linestyle='--', linewidth=1) plt.show() # Errors error_ANN, error_ANN_max = mean_aep(u1, u2), max_aep(u1, u2) # Save ANN Results if var_index == 0: np.savetxt("MUTC_training_loss.csv", history['loss'], delimiter=",") np.savetxt("MUTC_testing_loss.csv", history['val_loss'], delimiter=",") np.savetxt("MUTC_phasenoise_truth.csv", u1, delimiter=",") np.savetxt("MUTC_phasenoise_predictions.csv", u2, delimiter=",") # Linear Regression modelLR = LinearRegression() modelLR.fit(XX, YY) yhat = modelLR.predict(xx) u2 = denorm(pd.Series(np.squeeze(yhat)), train_statsY) error_LR, error_LR_max = mean_aep(u1, u2), max_aep(u1, u2) # k-Nearest Neighbors modelkNN = KNeighborsRegressor() modelkNN.fit(XX, YY) yhat = modelkNN.predict(xx) u2 = denorm(pd.Series(np.squeeze(yhat)), train_statsY) error_kNN, error_kNN_max = mean_aep(u1, u2), max_aep(u1, u2) # Random Forest modelRF = RandomForestRegressor() modelRF.fit(XX, YY) yhat = modelRF.predict(xx) u2 = denorm(pd.Series(np.squeeze(yhat)), train_statsY) error_RF, error_RF_max = mean_aep(u1, u2), max_aep(u1, u2) # Print Errors print('************', var_index, '************') print('Mean Absolute Percentage Errors: LR, kNN, RF, ANN') print(error_LR, error_kNN, error_RF, error_ANN) print('Max Absolute Percentage Errors: LR, kNN, RF, ANN') print(error_LR_max, error_kNN_max, error_RF_max, error_ANN_max)