ron-ligsay · darshan-y · Apr 9, 2023 · Apr 9, 2023 · Apr 9, 2023 · Apr 9, 2023
diff --git a/Q-Learning/README.md b/Q-Learning/README.md
@@ -0,0 +1 @@
+
diff --git a/Q-Learning/Shortest_distance.ipynb b/Q-Learning/Shortest_distance.ipynb
@@ -0,0 +1,205 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "047e17aa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#import libraries\n",
+    "import numpy as np\n",
+    "import random"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "c060edaf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#create an environment for the game\n",
+    "class GameEnvironment():\n",
+    "    def __init__(self, grid, start_state=(0, 0)):\n",
+    "        self.grid = grid\n",
+    "        self.rows, self.cols = grid.shape\n",
+    "        self.start_state = start_state\n",
+    "        self.num_turns = 0\n",
+    "        \n",
+    "    def get_reward(self): #return reward obtained in current state\n",
+    "        if self.grid[self.state[0]][self.state[1]] == 1:\n",
+    "            return 1\n",
+    "        else:\n",
+    "            return -1\n",
+    "    \n",
+    "    def is_valid(self, state):   # checks if this is a valid state(move)\n",
+    "        if state[0] < 0 or state[0] >= self.rows:\n",
+    "            # outside row bound\n",
+    "            return False\n",
+    "        elif state[1] < 0 or state[1] >= self.cols:\n",
+    "            # outside column bound\n",
+    "            return False\n",
+    "        elif np.isnan(self.grid[state[0]][state[1]]):\n",
+    "            # wall\n",
+    "            return False\n",
+    "        return True\n",
+    "    def get_next_state(self, action):    # update state according to action taken\n",
+    "        if action == \"up\":\n",
+    "            next_state = (self.state[0]-1, self.state[1])\n",
+    "        elif action == \"down\":\n",
+    "            next_state = (self.state[0]+1, self.state[1])\n",
+    "        elif action == \"right\":\n",
+    "            next_state = (self.state[0], self.state[1]+1)\n",
+    "        elif action == \"left\":\n",
+    "            next_state = (self.state[0], self.state[1]-1)\n",
+    "        else:\n",
+    "            raise ValueError(\"Invalid action!\")\n",
+    "        if self.is_valid(next_state):\n",
+    "            return next_state\n",
+    "        else:\n",
+    "            return self.state\n",
+    "        \n",
+    "    def reset(self):   #reset states\n",
+    "        self.state = self.start_state\n",
+    "        self.num_turns = 0\n",
+    "    \n",
+    "    def is_end(self):     #Returns true if the game has come to an end\n",
+    "        val = self.grid[self.state[0]][self.state[1]]\n",
+    "        return val == 1 or self.num_turns == 100"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "69ce1d67",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#creating an Agent\n",
+    "class CreateAgent():\n",
+    "    def __init__(self, game):\n",
+    "        self.game = game\n",
+    "        self.lr = 0.2\n",
+    "        self.explore_rate = 0.1\n",
+    "        self.gamma = 0.95\n",
+    "        self.actions = [\"up\", \"down\", \"left\", \"right\"]\n",
+    "        \n",
+    "        self.state_values = {}\n",
+    "        for i in range(self.game.rows):\n",
+    "            for j in range(self.game.cols):\n",
+    "                self.state_values[(i, j)] = 0\n",
+    "                \n",
+    "    def get_valid_actions(self):    #Get a list of valid actions from current position\n",
+    "        valid = []\n",
+    "        for action in self.actions:\n",
+    "            state = self.game.get_next_state(action)\n",
+    "            if state != self.game.state:\n",
+    "                valid.append(action)\n",
+    "        return valid\n",
+    "        \n",
+    "    \n",
+    "    def select_action(self):\n",
+    "        valid_actions = self.get_valid_actions()\n",
+    "        # if we don't shuffle, the agent may follow same path over and over again \n",
+    "        # (in the order) and never reach the global minima\n",
+    "        random.shuffle(valid_actions) \n",
+    "        if np.random.uniform(0, 1) <= self.explore_rate:\n",
+    "            action = np.random.choice(valid_actions)\n",
+    "        else:\n",
+    "            max_reward = -np.inf\n",
+    "            action = \"\"\n",
+    "            for a in valid_actions:\n",
+    "                exp_reward = self.state_values[self.game.get_next_state(a)]\n",
+    "                if exp_reward >= max_reward:\n",
+    "                    max_reward = exp_reward\n",
+    "                    action = a\n",
+    "        return action\n",
+    "    \n",
+    "    def play_step(self, max_iter=1000):\n",
+    "        self.game.reset()\n",
+    "        it = 0\n",
+    "        path = []\n",
+    "        path.append(self.game.state)\n",
+    "        while True:\n",
+    "            action = self.select_action()\n",
+    "            self.game.state = self.game.get_next_state(action)\n",
+    "            self.game.num_turns += 1\n",
+    "            path.append(self.game.state)\n",
+    "            \n",
+    "            if self.game.is_end():\n",
+    "                reward = self.game.get_reward()\n",
+    "                for state in reversed(path):\n",
+    "                    self.state_values[state] += self.lr * (reward-self.state_values[state])\n",
+    "                    reward *= self.gamma\n",
+    "                return path\n",
+    "    \n",
+    "    def play(self, ite):\n",
+    "        for _ in range(ite):\n",
+    "            path = self.play_step()\n",
+    "            self.explore_rate *= 0.9\n",
+    "        print('Shortest path found with length:', len(path))\n",
+    "        print(path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "68045a1f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Shortest path found with length: 8\n",
+      "[(0, 0), (1, 0), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (3, 4)]\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Testing\n",
+    "N = np.nan\n",
+    "grid = np.array([\n",
+    "    [0, N, 0, 0, N, 0], \n",
+    "    [0, N, 0, 0, N, 0], \n",
+    "    [0, 0, 0, 0, 0, 0], \n",
+    "    [0, 0, N, 0, 1, 0]\n",
+    "])\n",
+    "start_state=(0, 0)\n",
+    "game = GameEnvironment(grid, start_state)\n",
+    "agent = CreateAgent(game)\n",
+    "agent.play(100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6d140146",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}