MUTC Photodetector Dataset
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

183 lines
6.6 KiB

2 months ago
# ...existing code...
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
# ...existing code...
def normx(x, train_statsX):
return (x - train_statsX['mean']) / train_statsX['std']
def norm(y, train_statsY):
return (y - train_statsY['mean']) / train_statsY['std']
def denorm(y, train_statsY):
return (y* train_statsY['std'] + train_statsY['mean'])
def mean_aep(u1,u2):
return (round(100*(100*sum(abs((u2-u1)/u1))/len(u1)))/100)
def max_aep(u1,u2):
return (round(100*(100*max(abs((u2-u1)/u1))))/100)
df = pd.read_csv("./MUTC1750designs.csv")
df[df.columns[22:40]] =np.log10(df[df.columns[22:40]])
print(df.shape)
print(df.head(3))
print(df.describe())
print(df[df.columns[0:5]].std()/df[df.columns[0:5]].mean())
# ANN parameters
ac = 'relu' # activation function
nnno = 48 # number of neurons
dr_rate = 0.2 # dropout rate
EPOCHS = 400 # number of epocs
LR = 0.001 # learning rate
for var_index in np.arange(5):
X_Train, X_Test, Y_Train, Y_Test = train_test_split(df.iloc[0:-1, 5:40], df.iloc[0:-1, var_index], test_size=0.2, random_state=55)
train_statsY = Y_Train.describe().transpose()
train_statsX = X_Train.describe().transpose()
XX = normx(X_Train, train_statsX)
YY = norm(Y_Train, train_statsY)
xx = normx(X_Test, train_statsX)
yy = norm(Y_Test, train_statsY)
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(XX.values, dtype=torch.float32)
Y_train_tensor = torch.tensor(YY.values, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(xx.values, dtype=torch.float32)
Y_test_tensor = torch.tensor(yy.values, dtype=torch.float32).view(-1, 1)
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
class ANNModel(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate):
super(ANNModel, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.dropout1 = nn.Dropout(dropout_rate)
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
self.dropout2 = nn.Dropout(dropout_rate)
self.fc3 = nn.Linear(hidden_dim + input_dim, hidden_dim)
self.dropout3 = nn.Dropout(dropout_rate)
self.fc4 = nn.Linear(hidden_dim, hidden_dim)
self.dropout4 = nn.Dropout(dropout_rate)
self.fc5 = nn.Linear(hidden_dim + input_dim, hidden_dim)
self.dropout5 = nn.Dropout(dropout_rate)
self.fc6 = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
x1 = torch.relu(self.fc1(x))
x1 = self.dropout1(x1)
x1 = torch.relu(self.fc2(x1))
x1 = self.dropout2(x1)
x1 = torch.cat((x1, x), dim=1) #拼接
x1 = torch.relu(self.fc3(x1))
x1 = self.dropout3(x1)
x1 = torch.relu(self.fc4(x1))
x1 = self.dropout4(x1)
x1 = torch.cat((x1, x), dim=1)
x1 = torch.relu(self.fc5(x1))
x1 = self.dropout5(x1)
output = self.fc6(x1)
return output
#通过多层次的特征提取和拼接操作,增强了模型的表达能力
input_dim = X_Train.shape[1]
hidden_dim = nnno
output_dim = 1
dropout_rate = dr_rate
model = ANNModel(input_dim, hidden_dim, output_dim, dropout_rate)
criterion = nn.MSELoss()
optimizer = optim.Adamax(model.parameters(), lr=LR)
# Training loop
model.train()
history = {'loss': [], 'val_loss': []}
for epoch in range(EPOCHS):
for X_batch, Y_batch in train_loader:
optimizer.zero_grad()
outputs = model(X_batch)
loss = criterion(outputs, Y_batch)
loss.backward()
history['loss'].append(loss.item())
optimizer.step()
# Evaluation
model.eval()
with torch.no_grad():
test_predictions = model(X_test_tensor).numpy()
history['val_loss'].append(criterion(torch.tensor(test_predictions), Y_test_tensor).item())
# Evaluation
model.eval()
with torch.no_grad():
test_predictions = model(X_test_tensor).numpy()
u1 = denorm(yy, train_statsY).to_numpy()
u2 = denorm(pd.Series(np.squeeze(test_predictions)), train_statsY).to_numpy()
# Plot losses
plt.figure(var_index + 10)
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()
# Plot truth vs. prediction
x1 = min(min(u1), min(u2))
x2 = max(max(u1), max(u2))
plt.figure(var_index)
plt.plot([x1, x2], [x1, x2], color='red')
plt.scatter(u1, u2)
plt.xlabel('Ground Truth')
plt.ylabel('Prediction')
plt.gca().set_aspect('equal', adjustable='box')
plt.grid(color='grey', linestyle='--', linewidth=1)
plt.show()
# Errors
error_ANN, error_ANN_max = mean_aep(u1, u2), max_aep(u1, u2)
# Save ANN Results
if var_index == 0:
np.savetxt("MUTC_training_loss.csv", history['loss'], delimiter=",")
np.savetxt("MUTC_testing_loss.csv", history['val_loss'], delimiter=",")
np.savetxt("MUTC_phasenoise_truth.csv", u1, delimiter=",")
np.savetxt("MUTC_phasenoise_predictions.csv", u2, delimiter=",")
# Linear Regression
modelLR = LinearRegression()
modelLR.fit(XX, YY)
yhat = modelLR.predict(xx)
u2 = denorm(pd.Series(np.squeeze(yhat)), train_statsY)
error_LR, error_LR_max = mean_aep(u1, u2), max_aep(u1, u2)
# k-Nearest Neighbors
modelkNN = KNeighborsRegressor()
modelkNN.fit(XX, YY)
yhat = modelkNN.predict(xx)
u2 = denorm(pd.Series(np.squeeze(yhat)), train_statsY)
error_kNN, error_kNN_max = mean_aep(u1, u2), max_aep(u1, u2)
# Random Forest
modelRF = RandomForestRegressor()
modelRF.fit(XX, YY)
yhat = modelRF.predict(xx)
u2 = denorm(pd.Series(np.squeeze(yhat)), train_statsY)
error_RF, error_RF_max = mean_aep(u1, u2), max_aep(u1, u2)
# Print Errors
print('************', var_index, '************')
print('Mean Absolute Percentage Errors: LR, kNN, RF, ANN')
print(error_LR, error_kNN, error_RF, error_ANN)
print('Max Absolute Percentage Errors: LR, kNN, RF, ANN')
print(error_LR_max, error_kNN_max, error_RF_max, error_ANN_max)