diff --git "a/20191020-AM-Keras-MNIST-\346\211\213\345\257\253\350\276\250\350\255\230-Starter-\350\244\207\347\277\222.ipynb" "b/20191020-AM-Keras-MNIST-\346\211\213\345\257\253\350\276\250\350\255\230-Starter-\350\244\207\347\277\222.ipynb" new file mode 100644 index 0000000..0f2318d --- /dev/null +++ "b/20191020-AM-Keras-MNIST-\346\211\213\345\257\253\350\276\250\350\255\230-Starter-\350\244\207\347\277\222.ipynb" @@ -0,0 +1,233 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 如何知道有多少數據\n", + "from keras import datasets\n", + "from keras.utils import to_categorical\n", + "from keras.datasets import mnist\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# tuple (a, b)\n", + "# list [a, b]\n", + "# dict {'a':b}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mnist.load_data()[1][1].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 數據是28 * 28 = 784\n", + "(X_train, y_train), (X_test, y_test) = mnist.load_data()\n", + "X_train.shape, y_train.shape, X_test.shape, y_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 圖片\n", + "X_train[0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 畫一張\n", + "%matplotlib inline\n", + "plt.figure(figsize=(3/4, 3/4))\n", + "plt.imshow(X_train[0], cmap='gray_r')\n", + "plt.axis('off')\n", + "plt.show()\n", + "\n", + "# '''\n", + "# alueError: Colormap xxx is not recognized. Possible values are: \n", + "# Accent, Accent_r, Blues, Blues_r, BrBG, BrBG_r, BuGn, BuGn_r, BuPu, BuPu_r, CMRmap, CMRmap_r, Dark2, Dark2_r, GnBu, GnBu_r, Greens, Greens_r, Greys, Greys_r, OrRd, OrRd_r, Oranges, Oranges_r, PRGn, PRGn_r, Paired, Paired_r, Pastel1, Pastel1_r, Pastel2, Pastel2_r, PiYG, PiYG_r, PuBu, PuBuGn, PuBuGn_r, PuBu_r, PuOr, PuOr_r, PuRd, PuRd_r, Purples, Purples_r, RdBu, RdBu_r, RdGy, RdGy_r, RdPu, RdPu_r, RdYlBu, RdYlBu_r, RdYlGn, RdYlGn_r, Reds, Reds_r, Set1, Set1_r, Set2, Set2_r, Set3, Set3_r, Spectral, Spectral_r, Wistia, Wistia_r, YlGn, YlGnBu, YlGnBu_r, YlGn_r, YlOrBr, YlOrBr_r, YlOrRd, YlOrRd_r, afmhot, afmhot_r, autumn, autumn_r, binary, binary_r, bone, bone_r, brg, brg_r, bwr, bwr_r, cividis, cividis_r, cool, cool_r, coolwarm, coolwarm_r, copper, copper_r, cubehelix, cubehelix_r, flag, flag_r, gist_earth, gist_earth_r, gist_gray, gist_gray_r, gist_heat, gist_heat_r, gist_ncar, gist_ncar_r, gist_rainbow, gist_rainbow_r, gist_stern, gist_stern_r, gist_yarg, gist_yarg_r, gnuplot, gnuplot2, gnuplot2_r, gnuplot_r, gray, gray_r, hot, hot_r, hsv, hsv_r, inferno, inferno_r, jet, jet_r, magma, magma_r, nipy_spectral, nipy_spectral_r, ocean, ocean_r, pink, pink_r, plasma, plasma_r, prism, prism_r, rainbow, rainbow_r, seismic, seismic_r, spring, spring_r, summer, summer_r, tab10, tab10_r, tab20, tab20_r, tab20b, tab20b_r, tab20c, tab20c_r, terrain, terrain_r, twilight, twilight_r, twilight_shifted, twilight_shifted_r, viridis, viridis_r, winter, winter_r'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# data pre-processing\n", + "X_train, X_test = X_train.astype(np.float32).reshape(60000, 784), X_test.astype(np.float32).reshape(10000, 784)\n", + "# one_hot encoding\n", + "y_train_oh, y_test_oh = to_categorical(y_train), to_categorical(y_test) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_train[0] , y_train_oh[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# build model Sequential = 底座 , Dense : 神經層 , optimizers 找出所有參數的最小誤差(loss) adam sgd \n", + "# activation feedward 計算過程 判斷神經是否可以從上一層 到 下一層 relu sigmoid softmax\n", + "# 60000 : 筆數 , (28, 28) :每一張相片(row,col), 784 : flatten of (28, 28)\n", + "# 60000 : 攸關到 batch_size 產生要訓練幾次\n", + "# 784 : input layer features(dim)\n", + "# 1000 : 1st hidden layer units \n", + "# 500 : 2nd hidden layer units\n", + "# 10 : output layer units\n", + "# epochs: 針對那全部的六萬(60000)每一次批次,要循環幾次,訓練上述的參數 1,290,510\n", + "from keras.models import Sequential\n", + "from keras.layers import Dense\n", + "from keras import optimizers\n", + "model = Sequential()\n", + "model.add( Dense(units=1000, activation='relu', input_shape=(784,) ) )\n", + "model.add( Dense(units=500, activation='relu') )\n", + "model.add( Dense(units=10, activation='softmax') )\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# train model \n", + "# ValueError: Error when checking target: expected dense_3 to have 2 dimensions\n", + "# but got array with shape (60000, 10, 2, 2)\n", + "# ValueError: Error when checking target: expected dense_6 to have shape (10,) but got array with shape (1,)\n", + "\n", + "model.compile(optimizer=optimizers.Adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy'] )\n", + "history = model.fit(X_train, y_train_oh, validation_split=0.1, epochs=10, verbose=1, batch_size= 1000)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot\n", + "plt.title('Accuracy of Keras')\n", + "plt.plot(history.history['acc'], color='red')\n", + "plt.plot(history.history['val_acc'], color='blue')\n", + "plt.grid()\n", + "plt.show()\n", + "\n", + "\n", + "plt.title('Loss of Keras')\n", + "plt.plot(history.history['loss'], color='red')\n", + "plt.plot(history.history['val_loss'], color='blue')\n", + "plt.grid()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# evaluate model \n", + "print(model.evaluate(X_train, y_train_oh), model.evaluate(X_test, y_test_oh) )\n", + "# predict model \n", + "y_pred = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# top 10 \n", + "# 請你找一下那些數字是錯誤 slicing \n", + "index_error = np.argmax(y_pred, axis=1) != np.argmax(y_test_oh, axis=1)\n", + "\n", + "X_error = X_test[ index_error ]\n", + "y_error = y_test_oh[ index_error ]\n", + "y_error_pred = y_pred[ index_error ]\n", + "\n", + "print('Total error amount is {}'.format(X_error.shape[0]))\n", + "\n", + "# Plot for top 10 [:10]\n", + "for i in range(10):\n", + " plt.figure(figsize=(1,1))\n", + " plt.title('GT:{} --> Pred:{}'.format(np.argmax(y_error[i]), np.argmax(y_error_pred[i])) )\n", + " plt.imshow(X_error[i].reshape(28,28), cmap='gray' )\n", + " plt.axis('off')\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}