Add files via upload

5 years ago · 2a610476b7
1 changed files with 255 additions and 0 deletions
--- a/MUTC1750_Predict_Metrics.ipynb
+++ b/MUTC1750_Predict_Metrics.ipynb
@ -0,0 +1,255 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "MUTC1750_Predict_Metrics.ipynb",
+      "private_outputs": true,
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "H6eOwX3UGpvG"
+      },
+      "source": [
+        "import pathlib\n",
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import seaborn as sns\n",
+        "# ML models\n",
+        "from sklearn.linear_model import LinearRegression\n",
+        "from sklearn.neighbors import KNeighborsRegressor\n",
+        "from sklearn.ensemble import RandomForestRegressor\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "# Multilayer Perceptron\n",
+        "from keras.models import Model\n",
+        "from keras.layers import Input\n",
+        "from keras.layers import Dense\n",
+        "from keras.layers import Dropout\n",
+        "from keras.layers.merge import concatenate\n",
+        "from tensorflow.keras import optimizers\n",
+        "from tensorflow.keras import backend\n",
+        "from keras.layers.advanced_activations import LeakyReLU, PReLU\n"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xO6mZcSAGyp7"
+      },
+      "source": [
+        "!git clone https://github.com/simsekergun/photodetectors.git\n",
+        "df = pd.read_csv(\"./photodetectors/MUTC1750designs.csv\")"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1qLZ_FkrHJJB"
+      },
+      "source": [
+        "# let's take log10 of doping levels so that we deal with numbers in the similar ranges\n",
+        "df[df.columns[22:40]] =np.log10(df[df.columns[22:40]])\n",
+        "df.shape"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "x5h23R0y1MXk"
+      },
+      "source": [
+        "df.head(3)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "iyFCz4P3xSgt"
+      },
+      "source": [
+        "df.describe()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qBs0SZEek_Sh"
+      },
+      "source": [
+        "## Coefficient of Correlation\n",
+        "df[df.columns[0:5]].std()/df[df.columns[0:5]].mean()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "lnIsvKeLIwZY"
+      },
+      "source": [
+        "# Let us some functions to normalize, de-normalize, and to calculate errors\n",
+        "def normx(x):\n",
+        "  return (x - train_statsX['mean']) / train_statsX['std']\n",
+        "def norm(y):\n",
+        "  return (y - train_statsY['mean']) / train_statsY['std']\n",
+        "def denorm(y):\n",
+        "  return (y* train_statsY['std'] + train_statsY['mean'])  \n",
+        "def mean_aep(u1,u2):  \n",
+        "  return (round(100*(100*sum(abs((u2-u1)/u1))/len(u1)))/100)\n",
+        "def max_aep(u1,u2):  \n",
+        "  return (round(100*(100*max(abs((u2-u1)/u1))))/100)  "
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "nAXW1rSrkJon"
+      },
+      "source": [
+        "# ANN parameters\n",
+        "ac = 'relu'  # activation function\n",
+        "nnno = 48    # number of neurons\n",
+        "dr_rate = 0.2  # dropout rate\n",
+        "EPOCHS = 400    # number of epocs\n",
+        "LR = 0.001     # learning rate"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "T9UuySubf9K9"
+      },
+      "source": [
+        "for var_index in np.arange(5):\n",
+        "  X_Train, X_Test, Y_Train, Y_Test = train_test_split(df.iloc[0:-1,5:40],df.iloc[0:-1,var_index], test_size=0.2, random_state=55)\n",
+        "\n",
+        "  train_statsY = Y_Train.describe().transpose()\n",
+        "  train_statsX = X_Train.describe().transpose()\n",
+        "  XX = normx(X_Train)\n",
+        "  YY = norm(Y_Train)\n",
+        "  xx = normx(X_Test)\n",
+        "  yy = norm(Y_Test)\n",
+        "  #\n",
+        "  visible = Input(shape=(len(X_Train.keys()),))\n",
+        "  hidden1 = Dense(nnno, activation=ac)(visible)\n",
+        "  hidden1 = Dropout(dr_rate)(hidden1)\n",
+        "  hidden2 = Dense(nnno, activation=ac)(hidden1)\n",
+        "  hidden2 = Dropout(dr_rate)(hidden2)\n",
+        "  mergeA = concatenate([hidden2, visible])\n",
+        "  hiddenB = Dense(nnno, activation=ac)(mergeA)\n",
+        "  hiddenB = Dropout(dr_rate)(hiddenB)\n",
+        "  hidden3 = Dense(nnno, activation=ac)(hiddenB)\n",
+        "  hidden3 = Dropout(dr_rate)(hidden3)\n",
+        "  merge = concatenate([hidden3, visible])\n",
+        "  hidden4 = Dense(nnno, activation=ac)(merge)\n",
+        "  hidden4 = Dropout(dr_rate)(hidden4)\n",
+        "  predicted_value = Dense(1)(hidden4)\n",
+        "  modelANN = Model(inputs=visible, outputs=predicted_value)\n",
+        "  #\n",
+        "  opt = optimizers.Adamax(learning_rate=LR)\n",
+        "  modelANN.compile(optimizer=opt, loss=['mse'])\n",
+        "  history = modelANN.fit(XX, YY,epochs=EPOCHS, validation_data = (xx,yy), verbose=0)\n",
+        "  # plot losses\n",
+        "  plt.figure(var_index+10)\n",
+        "  plt.plot(history.history['loss'])\n",
+        "  plt.plot(history.history['val_loss'])\n",
+        "  plt.ylabel('loss')\n",
+        "  plt.xlabel('epoch')\n",
+        "  plt.legend(['train', 'test'], loc='upper right')\n",
+        "  plt.show()\n",
+        "  #\n",
+        "  test_predictions = modelANN.predict(xx)\n",
+        "  u1 = denorm(yy).to_numpy()\n",
+        "  u2 = denorm(pd.Series(np.squeeze(test_predictions)))\n",
+        "  # plot truth vs. prediction\n",
+        "  x1 = min(min(u1),min(u2))\n",
+        "  x2 = max(max(u1),max(u2))\n",
+        "  plt.figure(var_index)\n",
+        "  plt.plot([x1,x2],[x1,x2],color='red')\n",
+        "  plt.scatter(u1, u2)\n",
+        "  plt.xlabel('Ground Truth')\n",
+        "  plt.ylabel('Prediction')\n",
+        "  plt.gca().set_aspect('equal', adjustable='box')\n",
+        "  plt.grid(color='grey', linestyle='--', linewidth=1)\n",
+        "  # Errors\n",
+        "  error_ANN, error_ANN_max = mean_aep(u1,u2), max_aep(u1,u2)  \n",
+        "  # Save ANN Results\n",
+        "  if var_index == 0:\n",
+        "    np.savetxt(\"MUTC_training_loss.csv\", history.history['loss'], delimiter=\",\")\n",
+        "    np.savetxt(\"MUTC_testing_loss.csv\", history.history['val_loss'], delimiter=\",\")\n",
+        "    np.savetxt(\"MUTC_phasenoise_truth.csv\", u1, delimiter=\",\")\n",
+        "    np.savetxt(\"MUTC_phasenoise_predictions.csv\", u2, delimiter=\",\")\n",
+        "  ## LINEAR REGRESSION\n",
+        "  modelLR = LinearRegression()\n",
+        "  modelLR.fit(XX, YY)\n",
+        "  yhat = modelLR.predict(xx)\n",
+        "  u2 = denorm(pd.Series(np.squeeze(yhat)))\n",
+        "  # calculate errors\n",
+        "  error_LR, error_LR_max = mean_aep(u1,u2), max_aep(u1,u2)\n",
+        "  ## k-Nearest Neighbors\n",
+        "  modelkNN = KNeighborsRegressor()\n",
+        "  modelkNN.fit(XX, YY)\n",
+        "  yhat = modelkNN.predict(xx)\n",
+        "  u2 = denorm(pd.Series(np.squeeze(yhat)))\n",
+        "  # calculate errors\n",
+        "  error_kNN, error_kNN_max = mean_aep(u1,u2), max_aep(u1,u2)\n",
+        "  ## RANDOM FOREST  \n",
+        "  modelRF = RandomForestRegressor()\n",
+        "  modelRF.fit(XX, YY)\n",
+        "  yhat = modelRF.predict(xx)\n",
+        "  u2 = denorm(pd.Series(np.squeeze(yhat)))\n",
+        "  # calculate errors\n",
+        "  error_RF, error_RF_max = mean_aep(u1,u2), max_aep(u1,u2)\n",
+        "  # PRINT ERRORS\n",
+        "  print('************',var_index,'************')\n",
+        "  print('Mean Absolute Percentage Errors: LR, kNN, RF, ANN')\n",
+        "  print(error_LR, error_kNN, error_RF, error_ANN)\n",
+        "  print('Max Absolute Percentage Errors: LR, kNN, RF, ANN')\n",
+        "  print(error_LR_max, error_kNN_max, error_RF_max, error_ANN_max)\n",
+        "  backend.clear_session()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "FFtnWBnMmbAu"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}