In [None]:
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
# ML models
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
# Multilayer Perceptron
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers.merge import concatenate
from tensorflow.keras import optimizers
from tensorflow.keras import backend
from keras.layers.advanced_activations import LeakyReLU, PReLU


In [None]:
!git clone https://github.com/simsekergun/photodetectors.git
df = pd.read_csv("./photodetectors/MUTC1750designs.csv")

In [None]:
# let's take log10 of doping levels so that we deal with numbers in the similar ranges
df[df.columns[22:40]] =np.log10(df[df.columns[22:40]])
df.shape

In [None]:
df.head(3)

In [None]:
df.describe()

In [None]:
## Coefficient of Correlation
df[df.columns[0:5]].std()/df[df.columns[0:5]].mean()

In [None]:
# Let us some functions to normalize, de-normalize, and to calculate errors
def normx(x):
 return (x - train_statsX['mean']) / train_statsX['std']
def norm(y):
 return (y - train_statsY['mean']) / train_statsY['std']
def denorm(y):
 return (y* train_statsY['std'] + train_statsY['mean']) 
def mean_aep(u1,u2): 
 return (round(100*(100*sum(abs((u2-u1)/u1))/len(u1)))/100)
def max_aep(u1,u2): 
 return (round(100*(100*max(abs((u2-u1)/u1))))/100) 

In [None]:
# ANN parameters
ac = 'relu' # activation function
nnno = 48 # number of neurons
dr_rate = 0.2 # dropout rate
EPOCHS = 400 # number of epocs
LR = 0.001 # learning rate

In [None]:
for var_index in np.arange(5):
 X_Train, X_Test, Y_Train, Y_Test = train_test_split(df.iloc[0:-1,5:40],df.iloc[0:-1,var_index], test_size=0.2, random_state=55)

 train_statsY = Y_Train.describe().transpose()
 train_statsX = X_Train.describe().transpose()
 XX = normx(X_Train)
 YY = norm(Y_Train)
 xx = normx(X_Test)
 yy = norm(Y_Test)
 #
 visible = Input(shape=(len(X_Train.keys()),))
 hidden1 = Dense(nnno, activation=ac)(visible)
 hidden1 = Dropout(dr_rate)(hidden1)
 hidden2 = Dense(nnno, activation=ac)(hidden1)
 hidden2 = Dropout(dr_rate)(hidden2)
 mergeA = concatenate([hidden2, visible])
 hiddenB = Dense(nnno, activation=ac)(mergeA)
 hiddenB = Dropout(dr_rate)(hiddenB)
 hidden3 = Dense(nnno, activation=ac)(hiddenB)
 hidden3 = Dropout(dr_rate)(hidden3)
 merge = concatenate([hidden3, visible])
 hidden4 = Dense(nnno, activation=ac)(merge)
 hidden4 = Dropout(dr_rate)(hidden4)
 predicted_value = Dense(1)(hidden4)
 modelANN = Model(inputs=visible, outputs=predicted_value)
 #
 opt = optimizers.Adamax(learning_rate=LR)
 modelANN.compile(optimizer=opt, loss=['mse'])
 history = modelANN.fit(XX, YY,epochs=EPOCHS, validation_data = (xx,yy), verbose=0)
 # plot losses
 plt.figure(var_index+10)
 plt.plot(history.history['loss'])
 plt.plot(history.history['val_loss'])
 plt.ylabel('loss')
 plt.xlabel('epoch')
 plt.legend(['train', 'test'], loc='upper right')
 plt.show()
 #
 test_predictions = modelANN.predict(xx)
 u1 = denorm(yy).to_numpy()
 u2 = denorm(pd.Series(np.squeeze(test_predictions)))
 # plot truth vs. prediction
 x1 = min(min(u1),min(u2))
 x2 = max(max(u1),max(u2))
 plt.figure(var_index)
 plt.plot([x1,x2],[x1,x2],color='red')
 plt.scatter(u1, u2)
 plt.xlabel('Ground Truth')
 plt.ylabel('Prediction')
 plt.gca().set_aspect('equal', adjustable='box')
 plt.grid(color='grey', linestyle='--', linewidth=1)
 # Errors
 error_ANN, error_ANN_max = mean_aep(u1,u2), max_aep(u1,u2) 
 # Save ANN Results
 if var_index == 0:
 np.savetxt("MUTC_training_loss.csv", history.history['loss'], delimiter=",")
 np.savetxt("MUTC_testing_loss.csv", history.history['val_loss'], delimiter=",")
 np.savetxt("MUTC_phasenoise_truth.csv", u1, delimiter=",")
 np.savetxt("MUTC_phasenoise_predictions.csv", u2, delimiter=",")
 ## LINEAR REGRESSION
 modelLR = LinearRegression()
 modelLR.fit(XX, YY)
 yhat = modelLR.predict(xx)
 u2 = denorm(pd.Series(np.squeeze(yhat)))
 # calculate errors
 error_LR, error_LR_max = mean_aep(u1,u2), max_aep(u1,u2)
 ## k-Nearest Neighbors
 modelkNN = KNeighborsRegressor()
 modelkNN.fit(XX, YY)
 yhat = modelkNN.predict(xx)
 u2 = denorm(pd.Series(np.squeeze(yhat)))
 # calculate errors
 error_kNN, error_kNN_max = mean_aep(u1,u2), max_aep(u1,u2)
 ## RANDOM FOREST 
 modelRF = RandomForestRegressor()
 modelRF.fit(XX, YY)
 yhat = modelRF.predict(xx)
 u2 = denorm(pd.Series(np.squeeze(yhat)))
 # calculate errors
 error_RF, error_RF_max = mean_aep(u1,u2), max_aep(u1,u2)
 # PRINT ERRORS
 print('************',var_index,'************')
 print('Mean Absolute Percentage Errors: LR, kNN, RF, ANN')
 print(error_LR, error_kNN, error_RF, error_ANN)
 print('Max Absolute Percentage Errors: LR, kNN, RF, ANN')
 print(error_LR_max, error_kNN_max, error_RF_max, error_ANN_max)
 backend.clear_session()