From 2a610476b745b6c9fb74fcc87912e7dd5fea9323 Mon Sep 17 00:00:00 2001 From: simsekergun <58180288+simsekergun@users.noreply.github.com> Date: Thu, 27 May 2021 16:14:09 -0400 Subject: [PATCH] Add files via upload --- MUTC1750_Predict_Metrics.ipynb | 255 +++++++++++++++++++++++++++++++++ 1 file changed, 255 insertions(+) create mode 100644 MUTC1750_Predict_Metrics.ipynb diff --git a/MUTC1750_Predict_Metrics.ipynb b/MUTC1750_Predict_Metrics.ipynb new file mode 100644 index 0000000..6b94833 --- /dev/null +++ b/MUTC1750_Predict_Metrics.ipynb @@ -0,0 +1,255 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "MUTC1750_Predict_Metrics.ipynb", + "private_outputs": true, + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "id": "H6eOwX3UGpvG" + }, + "source": [ + "import pathlib\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "# ML models\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.model_selection import train_test_split\n", + "# Multilayer Perceptron\n", + "from keras.models import Model\n", + "from keras.layers import Input\n", + "from keras.layers import Dense\n", + "from keras.layers import Dropout\n", + "from keras.layers.merge import concatenate\n", + "from tensorflow.keras import optimizers\n", + "from tensorflow.keras import backend\n", + "from keras.layers.advanced_activations import LeakyReLU, PReLU\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "xO6mZcSAGyp7" + }, + "source": [ + "!git clone https://github.com/simsekergun/photodetectors.git\n", + "df = pd.read_csv(\"./photodetectors/MUTC1750designs.csv\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "1qLZ_FkrHJJB" + }, + "source": [ + "# let's take log10 of doping levels so that we deal with numbers in the similar ranges\n", + "df[df.columns[22:40]] =np.log10(df[df.columns[22:40]])\n", + "df.shape" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "x5h23R0y1MXk" + }, + "source": [ + "df.head(3)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "iyFCz4P3xSgt" + }, + "source": [ + "df.describe()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "qBs0SZEek_Sh" + }, + "source": [ + "## Coefficient of Correlation\n", + "df[df.columns[0:5]].std()/df[df.columns[0:5]].mean()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "lnIsvKeLIwZY" + }, + "source": [ + "# Let us some functions to normalize, de-normalize, and to calculate errors\n", + "def normx(x):\n", + " return (x - train_statsX['mean']) / train_statsX['std']\n", + "def norm(y):\n", + " return (y - train_statsY['mean']) / train_statsY['std']\n", + "def denorm(y):\n", + " return (y* train_statsY['std'] + train_statsY['mean']) \n", + "def mean_aep(u1,u2): \n", + " return (round(100*(100*sum(abs((u2-u1)/u1))/len(u1)))/100)\n", + "def max_aep(u1,u2): \n", + " return (round(100*(100*max(abs((u2-u1)/u1))))/100) " + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "nAXW1rSrkJon" + }, + "source": [ + "# ANN parameters\n", + "ac = 'relu' # activation function\n", + "nnno = 48 # number of neurons\n", + "dr_rate = 0.2 # dropout rate\n", + "EPOCHS = 400 # number of epocs\n", + "LR = 0.001 # learning rate" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "T9UuySubf9K9" + }, + "source": [ + "for var_index in np.arange(5):\n", + " X_Train, X_Test, Y_Train, Y_Test = train_test_split(df.iloc[0:-1,5:40],df.iloc[0:-1,var_index], test_size=0.2, random_state=55)\n", + "\n", + " train_statsY = Y_Train.describe().transpose()\n", + " train_statsX = X_Train.describe().transpose()\n", + " XX = normx(X_Train)\n", + " YY = norm(Y_Train)\n", + " xx = normx(X_Test)\n", + " yy = norm(Y_Test)\n", + " #\n", + " visible = Input(shape=(len(X_Train.keys()),))\n", + " hidden1 = Dense(nnno, activation=ac)(visible)\n", + " hidden1 = Dropout(dr_rate)(hidden1)\n", + " hidden2 = Dense(nnno, activation=ac)(hidden1)\n", + " hidden2 = Dropout(dr_rate)(hidden2)\n", + " mergeA = concatenate([hidden2, visible])\n", + " hiddenB = Dense(nnno, activation=ac)(mergeA)\n", + " hiddenB = Dropout(dr_rate)(hiddenB)\n", + " hidden3 = Dense(nnno, activation=ac)(hiddenB)\n", + " hidden3 = Dropout(dr_rate)(hidden3)\n", + " merge = concatenate([hidden3, visible])\n", + " hidden4 = Dense(nnno, activation=ac)(merge)\n", + " hidden4 = Dropout(dr_rate)(hidden4)\n", + " predicted_value = Dense(1)(hidden4)\n", + " modelANN = Model(inputs=visible, outputs=predicted_value)\n", + " #\n", + " opt = optimizers.Adamax(learning_rate=LR)\n", + " modelANN.compile(optimizer=opt, loss=['mse'])\n", + " history = modelANN.fit(XX, YY,epochs=EPOCHS, validation_data = (xx,yy), verbose=0)\n", + " # plot losses\n", + " plt.figure(var_index+10)\n", + " plt.plot(history.history['loss'])\n", + " plt.plot(history.history['val_loss'])\n", + " plt.ylabel('loss')\n", + " plt.xlabel('epoch')\n", + " plt.legend(['train', 'test'], loc='upper right')\n", + " plt.show()\n", + " #\n", + " test_predictions = modelANN.predict(xx)\n", + " u1 = denorm(yy).to_numpy()\n", + " u2 = denorm(pd.Series(np.squeeze(test_predictions)))\n", + " # plot truth vs. prediction\n", + " x1 = min(min(u1),min(u2))\n", + " x2 = max(max(u1),max(u2))\n", + " plt.figure(var_index)\n", + " plt.plot([x1,x2],[x1,x2],color='red')\n", + " plt.scatter(u1, u2)\n", + " plt.xlabel('Ground Truth')\n", + " plt.ylabel('Prediction')\n", + " plt.gca().set_aspect('equal', adjustable='box')\n", + " plt.grid(color='grey', linestyle='--', linewidth=1)\n", + " # Errors\n", + " error_ANN, error_ANN_max = mean_aep(u1,u2), max_aep(u1,u2) \n", + " # Save ANN Results\n", + " if var_index == 0:\n", + " np.savetxt(\"MUTC_training_loss.csv\", history.history['loss'], delimiter=\",\")\n", + " np.savetxt(\"MUTC_testing_loss.csv\", history.history['val_loss'], delimiter=\",\")\n", + " np.savetxt(\"MUTC_phasenoise_truth.csv\", u1, delimiter=\",\")\n", + " np.savetxt(\"MUTC_phasenoise_predictions.csv\", u2, delimiter=\",\")\n", + " ## LINEAR REGRESSION\n", + " modelLR = LinearRegression()\n", + " modelLR.fit(XX, YY)\n", + " yhat = modelLR.predict(xx)\n", + " u2 = denorm(pd.Series(np.squeeze(yhat)))\n", + " # calculate errors\n", + " error_LR, error_LR_max = mean_aep(u1,u2), max_aep(u1,u2)\n", + " ## k-Nearest Neighbors\n", + " modelkNN = KNeighborsRegressor()\n", + " modelkNN.fit(XX, YY)\n", + " yhat = modelkNN.predict(xx)\n", + " u2 = denorm(pd.Series(np.squeeze(yhat)))\n", + " # calculate errors\n", + " error_kNN, error_kNN_max = mean_aep(u1,u2), max_aep(u1,u2)\n", + " ## RANDOM FOREST \n", + " modelRF = RandomForestRegressor()\n", + " modelRF.fit(XX, YY)\n", + " yhat = modelRF.predict(xx)\n", + " u2 = denorm(pd.Series(np.squeeze(yhat)))\n", + " # calculate errors\n", + " error_RF, error_RF_max = mean_aep(u1,u2), max_aep(u1,u2)\n", + " # PRINT ERRORS\n", + " print('************',var_index,'************')\n", + " print('Mean Absolute Percentage Errors: LR, kNN, RF, ANN')\n", + " print(error_LR, error_kNN, error_RF, error_ANN)\n", + " print('Max Absolute Percentage Errors: LR, kNN, RF, ANN')\n", + " print(error_LR_max, error_kNN_max, error_RF_max, error_ANN_max)\n", + " backend.clear_session()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "FFtnWBnMmbAu" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file