You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
255 lines
8.7 KiB
255 lines
8.7 KiB
{
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0,
|
|
"metadata": {
|
|
"colab": {
|
|
"name": "MUTC1750_Predict_Metrics.ipynb",
|
|
"private_outputs": true,
|
|
"provenance": [],
|
|
"collapsed_sections": []
|
|
},
|
|
"kernelspec": {
|
|
"name": "python3",
|
|
"display_name": "Python 3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
},
|
|
"accelerator": "GPU"
|
|
},
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "H6eOwX3UGpvG"
|
|
},
|
|
"source": [
|
|
"import pathlib\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import seaborn as sns\n",
|
|
"# ML models\n",
|
|
"from sklearn.linear_model import LinearRegression\n",
|
|
"from sklearn.neighbors import KNeighborsRegressor\n",
|
|
"from sklearn.ensemble import RandomForestRegressor\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"# Multilayer Perceptron\n",
|
|
"from keras.models import Model\n",
|
|
"from keras.layers import Input\n",
|
|
"from keras.layers import Dense\n",
|
|
"from keras.layers import Dropout\n",
|
|
"from keras.layers.merge import concatenate\n",
|
|
"from tensorflow.keras import optimizers\n",
|
|
"from tensorflow.keras import backend\n",
|
|
"from keras.layers.advanced_activations import LeakyReLU, PReLU\n"
|
|
],
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "xO6mZcSAGyp7"
|
|
},
|
|
"source": [
|
|
"!git clone https://github.com/simsekergun/photodetectors.git\n",
|
|
"df = pd.read_csv(\"./photodetectors/MUTC1750designs.csv\")"
|
|
],
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "1qLZ_FkrHJJB"
|
|
},
|
|
"source": [
|
|
"# let's take log10 of doping levels so that we deal with numbers in the similar ranges\n",
|
|
"df[df.columns[22:40]] =np.log10(df[df.columns[22:40]])\n",
|
|
"df.shape"
|
|
],
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "x5h23R0y1MXk"
|
|
},
|
|
"source": [
|
|
"df.head(3)"
|
|
],
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "iyFCz4P3xSgt"
|
|
},
|
|
"source": [
|
|
"df.describe()"
|
|
],
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "qBs0SZEek_Sh"
|
|
},
|
|
"source": [
|
|
"## Coefficient of Correlation\n",
|
|
"df[df.columns[0:5]].std()/df[df.columns[0:5]].mean()"
|
|
],
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "lnIsvKeLIwZY"
|
|
},
|
|
"source": [
|
|
"# Let us some functions to normalize, de-normalize, and to calculate errors\n",
|
|
"def normx(x):\n",
|
|
" return (x - train_statsX['mean']) / train_statsX['std']\n",
|
|
"def norm(y):\n",
|
|
" return (y - train_statsY['mean']) / train_statsY['std']\n",
|
|
"def denorm(y):\n",
|
|
" return (y* train_statsY['std'] + train_statsY['mean']) \n",
|
|
"def mean_aep(u1,u2): \n",
|
|
" return (round(100*(100*sum(abs((u2-u1)/u1))/len(u1)))/100)\n",
|
|
"def max_aep(u1,u2): \n",
|
|
" return (round(100*(100*max(abs((u2-u1)/u1))))/100) "
|
|
],
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "nAXW1rSrkJon"
|
|
},
|
|
"source": [
|
|
"# ANN parameters\n",
|
|
"ac = 'relu' # activation function\n",
|
|
"nnno = 48 # number of neurons\n",
|
|
"dr_rate = 0.2 # dropout rate\n",
|
|
"EPOCHS = 400 # number of epocs\n",
|
|
"LR = 0.001 # learning rate"
|
|
],
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "T9UuySubf9K9"
|
|
},
|
|
"source": [
|
|
"for var_index in np.arange(5):\n",
|
|
" X_Train, X_Test, Y_Train, Y_Test = train_test_split(df.iloc[0:-1,5:40],df.iloc[0:-1,var_index], test_size=0.2, random_state=55)\n",
|
|
"\n",
|
|
" train_statsY = Y_Train.describe().transpose()\n",
|
|
" train_statsX = X_Train.describe().transpose()\n",
|
|
" XX = normx(X_Train)\n",
|
|
" YY = norm(Y_Train)\n",
|
|
" xx = normx(X_Test)\n",
|
|
" yy = norm(Y_Test)\n",
|
|
" #\n",
|
|
" visible = Input(shape=(len(X_Train.keys()),))\n",
|
|
" hidden1 = Dense(nnno, activation=ac)(visible)\n",
|
|
" hidden1 = Dropout(dr_rate)(hidden1)\n",
|
|
" hidden2 = Dense(nnno, activation=ac)(hidden1)\n",
|
|
" hidden2 = Dropout(dr_rate)(hidden2)\n",
|
|
" mergeA = concatenate([hidden2, visible])\n",
|
|
" hiddenB = Dense(nnno, activation=ac)(mergeA)\n",
|
|
" hiddenB = Dropout(dr_rate)(hiddenB)\n",
|
|
" hidden3 = Dense(nnno, activation=ac)(hiddenB)\n",
|
|
" hidden3 = Dropout(dr_rate)(hidden3)\n",
|
|
" merge = concatenate([hidden3, visible])\n",
|
|
" hidden4 = Dense(nnno, activation=ac)(merge)\n",
|
|
" hidden4 = Dropout(dr_rate)(hidden4)\n",
|
|
" predicted_value = Dense(1)(hidden4)\n",
|
|
" modelANN = Model(inputs=visible, outputs=predicted_value)\n",
|
|
" #\n",
|
|
" opt = optimizers.Adamax(learning_rate=LR)\n",
|
|
" modelANN.compile(optimizer=opt, loss=['mse'])\n",
|
|
" history = modelANN.fit(XX, YY,epochs=EPOCHS, validation_data = (xx,yy), verbose=0)\n",
|
|
" # plot losses\n",
|
|
" plt.figure(var_index+10)\n",
|
|
" plt.plot(history.history['loss'])\n",
|
|
" plt.plot(history.history['val_loss'])\n",
|
|
" plt.ylabel('loss')\n",
|
|
" plt.xlabel('epoch')\n",
|
|
" plt.legend(['train', 'test'], loc='upper right')\n",
|
|
" plt.show()\n",
|
|
" #\n",
|
|
" test_predictions = modelANN.predict(xx)\n",
|
|
" u1 = denorm(yy).to_numpy()\n",
|
|
" u2 = denorm(pd.Series(np.squeeze(test_predictions)))\n",
|
|
" # plot truth vs. prediction\n",
|
|
" x1 = min(min(u1),min(u2))\n",
|
|
" x2 = max(max(u1),max(u2))\n",
|
|
" plt.figure(var_index)\n",
|
|
" plt.plot([x1,x2],[x1,x2],color='red')\n",
|
|
" plt.scatter(u1, u2)\n",
|
|
" plt.xlabel('Ground Truth')\n",
|
|
" plt.ylabel('Prediction')\n",
|
|
" plt.gca().set_aspect('equal', adjustable='box')\n",
|
|
" plt.grid(color='grey', linestyle='--', linewidth=1)\n",
|
|
" # Errors\n",
|
|
" error_ANN, error_ANN_max = mean_aep(u1,u2), max_aep(u1,u2) \n",
|
|
" # Save ANN Results\n",
|
|
" if var_index == 0:\n",
|
|
" np.savetxt(\"MUTC_training_loss.csv\", history.history['loss'], delimiter=\",\")\n",
|
|
" np.savetxt(\"MUTC_testing_loss.csv\", history.history['val_loss'], delimiter=\",\")\n",
|
|
" np.savetxt(\"MUTC_phasenoise_truth.csv\", u1, delimiter=\",\")\n",
|
|
" np.savetxt(\"MUTC_phasenoise_predictions.csv\", u2, delimiter=\",\")\n",
|
|
" ## LINEAR REGRESSION\n",
|
|
" modelLR = LinearRegression()\n",
|
|
" modelLR.fit(XX, YY)\n",
|
|
" yhat = modelLR.predict(xx)\n",
|
|
" u2 = denorm(pd.Series(np.squeeze(yhat)))\n",
|
|
" # calculate errors\n",
|
|
" error_LR, error_LR_max = mean_aep(u1,u2), max_aep(u1,u2)\n",
|
|
" ## k-Nearest Neighbors\n",
|
|
" modelkNN = KNeighborsRegressor()\n",
|
|
" modelkNN.fit(XX, YY)\n",
|
|
" yhat = modelkNN.predict(xx)\n",
|
|
" u2 = denorm(pd.Series(np.squeeze(yhat)))\n",
|
|
" # calculate errors\n",
|
|
" error_kNN, error_kNN_max = mean_aep(u1,u2), max_aep(u1,u2)\n",
|
|
" ## RANDOM FOREST \n",
|
|
" modelRF = RandomForestRegressor()\n",
|
|
" modelRF.fit(XX, YY)\n",
|
|
" yhat = modelRF.predict(xx)\n",
|
|
" u2 = denorm(pd.Series(np.squeeze(yhat)))\n",
|
|
" # calculate errors\n",
|
|
" error_RF, error_RF_max = mean_aep(u1,u2), max_aep(u1,u2)\n",
|
|
" # PRINT ERRORS\n",
|
|
" print('************',var_index,'************')\n",
|
|
" print('Mean Absolute Percentage Errors: LR, kNN, RF, ANN')\n",
|
|
" print(error_LR, error_kNN, error_RF, error_ANN)\n",
|
|
" print('Max Absolute Percentage Errors: LR, kNN, RF, ANN')\n",
|
|
" print(error_LR_max, error_kNN_max, error_RF_max, error_ANN_max)\n",
|
|
" backend.clear_session()"
|
|
],
|
|
"execution_count": null,
|
|
"outputs": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"metadata": {
|
|
"id": "FFtnWBnMmbAu"
|
|
},
|
|
"source": [
|
|
""
|
|
],
|
|
"execution_count": null,
|
|
"outputs": []
|
|
}
|
|
]
|
|
}
|