You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
183 lines
6.6 KiB
183 lines
6.6 KiB
2 months ago
|
# ...existing code...
|
||
|
import torch
|
||
|
import torch.nn as nn
|
||
|
import torch.optim as optim
|
||
|
from torch.utils.data import DataLoader, TensorDataset
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
import matplotlib.pyplot as plt
|
||
|
from sklearn.linear_model import LinearRegression
|
||
|
from sklearn.neighbors import KNeighborsRegressor
|
||
|
from sklearn.ensemble import RandomForestRegressor
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
|
||
|
# ...existing code...
|
||
|
|
||
|
def normx(x, train_statsX):
|
||
|
return (x - train_statsX['mean']) / train_statsX['std']
|
||
|
def norm(y, train_statsY):
|
||
|
return (y - train_statsY['mean']) / train_statsY['std']
|
||
|
def denorm(y, train_statsY):
|
||
|
return (y* train_statsY['std'] + train_statsY['mean'])
|
||
|
def mean_aep(u1,u2):
|
||
|
return (round(100*(100*sum(abs((u2-u1)/u1))/len(u1)))/100)
|
||
|
def max_aep(u1,u2):
|
||
|
return (round(100*(100*max(abs((u2-u1)/u1))))/100)
|
||
|
|
||
|
df = pd.read_csv("./MUTC1750designs.csv")
|
||
|
df[df.columns[22:40]] =np.log10(df[df.columns[22:40]])
|
||
|
print(df.shape)
|
||
|
print(df.head(3))
|
||
|
print(df.describe())
|
||
|
print(df[df.columns[0:5]].std()/df[df.columns[0:5]].mean())
|
||
|
|
||
|
# ANN parameters
|
||
|
ac = 'relu' # activation function
|
||
|
nnno = 48 # number of neurons
|
||
|
dr_rate = 0.2 # dropout rate
|
||
|
EPOCHS = 400 # number of epocs
|
||
|
LR = 0.001 # learning rate
|
||
|
|
||
|
for var_index in np.arange(5):
|
||
|
X_Train, X_Test, Y_Train, Y_Test = train_test_split(df.iloc[0:-1, 5:40], df.iloc[0:-1, var_index], test_size=0.2, random_state=55)
|
||
|
|
||
|
train_statsY = Y_Train.describe().transpose()
|
||
|
train_statsX = X_Train.describe().transpose()
|
||
|
XX = normx(X_Train, train_statsX)
|
||
|
YY = norm(Y_Train, train_statsY)
|
||
|
xx = normx(X_Test, train_statsX)
|
||
|
yy = norm(Y_Test, train_statsY)
|
||
|
|
||
|
# Convert to PyTorch tensors
|
||
|
X_train_tensor = torch.tensor(XX.values, dtype=torch.float32)
|
||
|
Y_train_tensor = torch.tensor(YY.values, dtype=torch.float32).view(-1, 1)
|
||
|
X_test_tensor = torch.tensor(xx.values, dtype=torch.float32)
|
||
|
Y_test_tensor = torch.tensor(yy.values, dtype=torch.float32).view(-1, 1)
|
||
|
|
||
|
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
|
||
|
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
|
||
|
|
||
|
class ANNModel(nn.Module):
|
||
|
def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate):
|
||
|
super(ANNModel, self).__init__()
|
||
|
self.fc1 = nn.Linear(input_dim, hidden_dim)
|
||
|
self.dropout1 = nn.Dropout(dropout_rate)
|
||
|
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
|
||
|
self.dropout2 = nn.Dropout(dropout_rate)
|
||
|
self.fc3 = nn.Linear(hidden_dim + input_dim, hidden_dim)
|
||
|
self.dropout3 = nn.Dropout(dropout_rate)
|
||
|
self.fc4 = nn.Linear(hidden_dim, hidden_dim)
|
||
|
self.dropout4 = nn.Dropout(dropout_rate)
|
||
|
self.fc5 = nn.Linear(hidden_dim + input_dim, hidden_dim)
|
||
|
self.dropout5 = nn.Dropout(dropout_rate)
|
||
|
self.fc6 = nn.Linear(hidden_dim, output_dim)
|
||
|
|
||
|
def forward(self, x):
|
||
|
x1 = torch.relu(self.fc1(x))
|
||
|
x1 = self.dropout1(x1)
|
||
|
x1 = torch.relu(self.fc2(x1))
|
||
|
x1 = self.dropout2(x1)
|
||
|
x1 = torch.cat((x1, x), dim=1) #拼接
|
||
|
x1 = torch.relu(self.fc3(x1))
|
||
|
x1 = self.dropout3(x1)
|
||
|
x1 = torch.relu(self.fc4(x1))
|
||
|
x1 = self.dropout4(x1)
|
||
|
x1 = torch.cat((x1, x), dim=1)
|
||
|
x1 = torch.relu(self.fc5(x1))
|
||
|
x1 = self.dropout5(x1)
|
||
|
output = self.fc6(x1)
|
||
|
return output
|
||
|
#通过多层次的特征提取和拼接操作,增强了模型的表达能力
|
||
|
|
||
|
input_dim = X_Train.shape[1]
|
||
|
hidden_dim = nnno
|
||
|
output_dim = 1
|
||
|
dropout_rate = dr_rate
|
||
|
|
||
|
model = ANNModel(input_dim, hidden_dim, output_dim, dropout_rate)
|
||
|
criterion = nn.MSELoss()
|
||
|
optimizer = optim.Adamax(model.parameters(), lr=LR)
|
||
|
|
||
|
# Training loop
|
||
|
model.train()
|
||
|
history = {'loss': [], 'val_loss': []}
|
||
|
for epoch in range(EPOCHS):
|
||
|
for X_batch, Y_batch in train_loader:
|
||
|
optimizer.zero_grad()
|
||
|
outputs = model(X_batch)
|
||
|
loss = criterion(outputs, Y_batch)
|
||
|
loss.backward()
|
||
|
history['loss'].append(loss.item())
|
||
|
optimizer.step()
|
||
|
|
||
|
# Evaluation
|
||
|
model.eval()
|
||
|
with torch.no_grad():
|
||
|
test_predictions = model(X_test_tensor).numpy()
|
||
|
history['val_loss'].append(criterion(torch.tensor(test_predictions), Y_test_tensor).item())
|
||
|
|
||
|
# Evaluation
|
||
|
model.eval()
|
||
|
with torch.no_grad():
|
||
|
test_predictions = model(X_test_tensor).numpy()
|
||
|
u1 = denorm(yy, train_statsY).to_numpy()
|
||
|
u2 = denorm(pd.Series(np.squeeze(test_predictions)), train_statsY).to_numpy()
|
||
|
|
||
|
# Plot losses
|
||
|
plt.figure(var_index + 10)
|
||
|
plt.plot(history['loss'])
|
||
|
plt.plot(history['val_loss'])
|
||
|
plt.ylabel('loss')
|
||
|
plt.xlabel('epoch')
|
||
|
plt.legend(['train', 'test'], loc='upper right')
|
||
|
plt.show()
|
||
|
|
||
|
# Plot truth vs. prediction
|
||
|
x1 = min(min(u1), min(u2))
|
||
|
x2 = max(max(u1), max(u2))
|
||
|
plt.figure(var_index)
|
||
|
plt.plot([x1, x2], [x1, x2], color='red')
|
||
|
plt.scatter(u1, u2)
|
||
|
plt.xlabel('Ground Truth')
|
||
|
plt.ylabel('Prediction')
|
||
|
plt.gca().set_aspect('equal', adjustable='box')
|
||
|
plt.grid(color='grey', linestyle='--', linewidth=1)
|
||
|
plt.show()
|
||
|
|
||
|
# Errors
|
||
|
error_ANN, error_ANN_max = mean_aep(u1, u2), max_aep(u1, u2)
|
||
|
|
||
|
# Save ANN Results
|
||
|
if var_index == 0:
|
||
|
np.savetxt("MUTC_training_loss.csv", history['loss'], delimiter=",")
|
||
|
np.savetxt("MUTC_testing_loss.csv", history['val_loss'], delimiter=",")
|
||
|
np.savetxt("MUTC_phasenoise_truth.csv", u1, delimiter=",")
|
||
|
np.savetxt("MUTC_phasenoise_predictions.csv", u2, delimiter=",")
|
||
|
|
||
|
# Linear Regression
|
||
|
modelLR = LinearRegression()
|
||
|
modelLR.fit(XX, YY)
|
||
|
yhat = modelLR.predict(xx)
|
||
|
u2 = denorm(pd.Series(np.squeeze(yhat)), train_statsY)
|
||
|
error_LR, error_LR_max = mean_aep(u1, u2), max_aep(u1, u2)
|
||
|
|
||
|
# k-Nearest Neighbors
|
||
|
modelkNN = KNeighborsRegressor()
|
||
|
modelkNN.fit(XX, YY)
|
||
|
yhat = modelkNN.predict(xx)
|
||
|
u2 = denorm(pd.Series(np.squeeze(yhat)), train_statsY)
|
||
|
error_kNN, error_kNN_max = mean_aep(u1, u2), max_aep(u1, u2)
|
||
|
|
||
|
# Random Forest
|
||
|
modelRF = RandomForestRegressor()
|
||
|
modelRF.fit(XX, YY)
|
||
|
yhat = modelRF.predict(xx)
|
||
|
u2 = denorm(pd.Series(np.squeeze(yhat)), train_statsY)
|
||
|
error_RF, error_RF_max = mean_aep(u1, u2), max_aep(u1, u2)
|
||
|
|
||
|
# Print Errors
|
||
|
print('************', var_index, '************')
|
||
|
print('Mean Absolute Percentage Errors: LR, kNN, RF, ANN')
|
||
|
print(error_LR, error_kNN, error_RF, error_ANN)
|
||
|
print('Max Absolute Percentage Errors: LR, kNN, RF, ANN')
|
||
|
print(error_LR_max, error_kNN_max, error_RF_max, error_ANN_max)
|