Cradicle Explorer

/ ml / dqn / deepq_bot.ipynb
deepq_bot.ipynb
1  {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Untitled0.ipynb","version":"0.3.2","provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"ntekOXOvdoDQ","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":191},"outputId":"ddff18b0-5854-4d09-9424-f2e9982c683b","executionInfo":{"status":"ok","timestamp":1545346686615,"user_tz":300,"elapsed":82495,"user":{"displayName":"Jack Buttimer","photoUrl":"","userId":"00346238941589315254"}}},"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive/')"],"execution_count":1,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n","\n","Enter your authorization code:\n","··········\n","Mounted at /content/drive/\n"],"name":"stdout"}]},{"metadata":{"id":"U9qabpE4mq9U","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]},{"metadata":{"id":"zCCc5p3eg7qI","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":2689},"outputId":"2c1f66fc-119c-4f23-aa84-5023917319ac","executionInfo":{"status":"ok","timestamp":1545346696334,"user_tz":300,"elapsed":92116,"user":{"displayName":"Jack Buttimer","photoUrl":"","userId":"00346238941589315254"}}},"cell_type":"code","source":["!pip install baselines"],"execution_count":2,"outputs":[{"output_type":"stream","text":["Collecting baselines\n","\u001b[?25l  Downloading https://files.pythonhosted.org/packages/f1/bd/d7695f0e5649658b43eabf10d1efa11c70a30ce532faef994c8b7172a744/baselines-0.1.5.tar.gz (123kB)\n","\u001b[K    100% |████████████████████████████████| 133kB 4.5MB/s \n","\u001b[?25hRequirement already satisfied: gym[atari,classic_control,mujoco,robotics] in /usr/local/lib/python3.6/dist-packages (from baselines) (0.10.9)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.6/dist-packages (from baselines) (1.1.0)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.6/dist-packages (from baselines) (4.28.1)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.6/dist-packages (from baselines) (0.13.0)\n","Requirement already satisfied: zmq in /usr/local/lib/python3.6/dist-packages (from baselines) (0.0.0)\n","Requirement already satisfied: dill in /usr/local/lib/python3.6/dist-packages (from baselines) (0.2.8.2)\n","Requirement already satisfied: progressbar2 in /usr/local/lib/python3.6/dist-packages (from baselines) (3.38.0)\n","Requirement already satisfied: mpi4py in /usr/local/lib/python3.6/dist-packages (from baselines) (3.0.0)\n","Requirement already satisfied: cloudpickle in /usr/local/lib/python3.6/dist-packages (from baselines) (0.6.1)\n","Requirement already satisfied: tensorflow>=1.4.0 in /usr/local/lib/python3.6/dist-packages (from baselines) (1.12.0)\n","Requirement already satisfied: click in /usr/local/lib/python3.6/dist-packages (from baselines) (7.0)\n","Requirement already satisfied: numpy>=1.10.4 in /usr/local/lib/python3.6/dist-packages (from gym[atari,classic_control,mujoco,robotics]->baselines) (1.14.6)\n","Requirement already satisfied: requests>=2.0 in /usr/local/lib/python3.6/dist-packages (from gym[atari,classic_control,mujoco,robotics]->baselines) (2.18.4)\n","Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from gym[atari,classic_control,mujoco,robotics]->baselines) (1.11.0)\n","Requirement already satisfied: pyglet>=1.2.0 in /usr/local/lib/python3.6/dist-packages (from gym[atari,classic_control,mujoco,robotics]->baselines) (1.3.2)\n","Requirement already satisfied: atari_py>=0.1.4 in /usr/local/lib/python3.6/dist-packages (from gym[atari,classic_control,mujoco,robotics]->baselines) (0.1.7)\n","Requirement already satisfied: Pillow in /usr/local/lib/python3.6/dist-packages (from gym[atari,classic_control,mujoco,robotics]->baselines) (4.0.0)\n","Requirement already satisfied: PyOpenGL in /usr/local/lib/python3.6/dist-packages (from gym[atari,classic_control,mujoco,robotics]->baselines) (3.1.0)\n","Collecting mujoco_py>=1.50 (from gym[atari,classic_control,mujoco,robotics]->baselines)\n","\u001b[?25l  Downloading https://files.pythonhosted.org/packages/cf/8c/64e0630b3d450244feef0688d90eab2448631e40ba6bdbd90a70b84898e7/mujoco-py-1.50.1.68.tar.gz (120kB)\n","\u001b[K    100% |████████████████████████████████| 122kB 10.2MB/s \n","\u001b[?25hRequirement already satisfied: imageio in /usr/local/lib/python3.6/dist-packages (from gym[atari,classic_control,mujoco,robotics]->baselines) (2.4.1)\n","Requirement already satisfied: pyzmq in /usr/local/lib/python3.6/dist-packages (from zmq->baselines) (17.0.0)\n","Requirement already satisfied: python-utils>=2.3.0 in /usr/local/lib/python3.6/dist-packages (from progressbar2->baselines) (2.3.0)\n","Requirement already satisfied: absl-py>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.4.0->baselines) (0.6.1)\n","Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.4.0->baselines) (0.7.1)\n","Requirement already satisfied: keras-preprocessing>=1.0.5 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.4.0->baselines) (1.0.5)\n","Requirement already satisfied: keras-applications>=1.0.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.4.0->baselines) (1.0.6)\n","Requirement already satisfied: protobuf>=3.6.1 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.4.0->baselines) (3.6.1)\n","Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.4.0->baselines) (1.15.0)\n","Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.4.0->baselines) (0.32.3)\n","Requirement already satisfied: tensorboard<1.13.0,>=1.12.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.4.0->baselines) (1.12.1)\n","Requirement already satisfied: gast>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.4.0->baselines) (0.2.0)\n","Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow>=1.4.0->baselines) (1.1.0)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests>=2.0->gym[atari,classic_control,mujoco,robotics]->baselines) (2018.11.29)\n","Requirement already satisfied: urllib3<1.23,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests>=2.0->gym[atari,classic_control,mujoco,robotics]->baselines) (1.22)\n","Requirement already satisfied: idna<2.7,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests>=2.0->gym[atari,classic_control,mujoco,robotics]->baselines) (2.6)\n","Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests>=2.0->gym[atari,classic_control,mujoco,robotics]->baselines) (3.0.4)\n","Requirement already satisfied: future in /usr/local/lib/python3.6/dist-packages (from pyglet>=1.2.0->gym[atari,classic_control,mujoco,robotics]->baselines) (0.16.0)\n","Requirement already satisfied: olefile in /usr/local/lib/python3.6/dist-packages (from Pillow->gym[atari,classic_control,mujoco,robotics]->baselines) (0.46)\n","Collecting glfw>=1.4.0 (from mujoco_py>=1.50->gym[atari,classic_control,mujoco,robotics]->baselines)\n","  Downloading https://files.pythonhosted.org/packages/f3/dd/fd5c662d7a07fca96aaffab585acfa4c624a2400f91a5bc1ebbb514dd4f9/glfw-1.7.0.tar.gz\n","Requirement already satisfied: Cython>=0.27.2 in /usr/local/lib/python3.6/dist-packages (from mujoco_py>=1.50->gym[atari,classic_control,mujoco,robotics]->baselines) (0.29.2)\n","Requirement already satisfied: cffi>=1.10 in /usr/local/lib/python3.6/dist-packages (from mujoco_py>=1.50->gym[atari,classic_control,mujoco,robotics]->baselines) (1.11.5)\n","Collecting lockfile>=0.12.2 (from mujoco_py>=1.50->gym[atari,classic_control,mujoco,robotics]->baselines)\n","  Downloading https://files.pythonhosted.org/packages/c8/22/9460e311f340cb62d26a38c419b1381b8593b0bb6b5d1f056938b086d362/lockfile-0.12.2-py2.py3-none-any.whl\n","Requirement already satisfied: h5py in /usr/local/lib/python3.6/dist-packages (from keras-applications>=1.0.6->tensorflow>=1.4.0->baselines) (2.8.0)\n","Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.6.1->tensorflow>=1.4.0->baselines) (40.6.3)\n","Requirement already satisfied: werkzeug>=0.11.10 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.13.0,>=1.12.0->tensorflow>=1.4.0->baselines) (0.14.1)\n","Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.13.0,>=1.12.0->tensorflow>=1.4.0->baselines) (3.0.1)\n","Requirement already satisfied: pycparser in /usr/local/lib/python3.6/dist-packages (from cffi>=1.10->mujoco_py>=1.50->gym[atari,classic_control,mujoco,robotics]->baselines) (2.19)\n","Building wheels for collected packages: baselines, mujoco-py, glfw\n","  Running setup.py bdist_wheel for baselines ... \u001b[?25l-\b \b\\\b \bdone\n","\u001b[?25h  Stored in directory: /root/.cache/pip/wheels/c3/95/d4/dc08613e714458fb368c44948025dade91a37db5932faa058f\n","  Running setup.py bdist_wheel for mujoco-py ... \u001b[?25l-\b \b\\\b \berror\n","  Complete output from command /usr/bin/python3 -u -c \"import setuptools, tokenize;__file__='/tmp/pip-install-l9s1ruzo/mujoco-py/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\\r\\n', '\\n');f.close();exec(compile(code, __file__, 'exec'))\" bdist_wheel -d /tmp/pip-wheel-nqo9qjg2 --python-tag cp36:\n","  running bdist_wheel\n","  running build\n","  Traceback (most recent call last):\n","    File \"<string>\", line 1, in <module>\n","    File \"/tmp/pip-install-l9s1ruzo/mujoco-py/setup.py\", line 44, in <module>\n","      tests_require=read_requirements_file('requirements.dev.txt'),\n","    File \"/usr/local/lib/python3.6/dist-packages/setuptools/__init__.py\", line 143, in setup\n","      return distutils.core.setup(**attrs)\n","    File \"/usr/lib/python3.6/distutils/core.py\", line 148, in setup\n","      dist.run_commands()\n","    File \"/usr/lib/python3.6/distutils/dist.py\", line 955, in run_commands\n","      self.run_command(cmd)\n","    File \"/usr/lib/python3.6/distutils/dist.py\", line 974, in run_command\n","      cmd_obj.run()\n","    File \"/usr/local/lib/python3.6/dist-packages/wheel/bdist_wheel.py\", line 188, in run\n","      self.run_command('build')\n","    File \"/usr/lib/python3.6/distutils/cmd.py\", line 313, in run_command\n","      self.distribution.run_command(command)\n","    File \"/usr/lib/python3.6/distutils/dist.py\", line 974, in run_command\n","      cmd_obj.run()\n","    File \"/tmp/pip-install-l9s1ruzo/mujoco-py/setup.py\", line 28, in run\n","      import mujoco_py  # noqa: force build\n","    File \"/tmp/pip-install-l9s1ruzo/mujoco-py/mujoco_py/__init__.py\", line 3, in <module>\n","      from mujoco_py.builder import cymj, ignore_mujoco_warnings, functions, MujocoException\n","    File \"/tmp/pip-install-l9s1ruzo/mujoco-py/mujoco_py/builder.py\", line 20, in <module>\n","      from lockfile import LockFile\n","  ModuleNotFoundError: No module named 'lockfile'\n","  \n","  ----------------------------------------\n","\u001b[31m  Failed building wheel for mujoco-py\u001b[0m\n","\u001b[?25h  Running setup.py clean for mujoco-py\n","  Running setup.py bdist_wheel for glfw ... \u001b[?25l-\b \bdone\n","\u001b[?25h  Stored in directory: /root/.cache/pip/wheels/ca/aa/05/9b802212ed86ef800f54025059bac9e64f58dfaf17fb97e94e\n","Successfully built baselines glfw\n","Failed to build mujoco-py\n","Installing collected packages: baselines, glfw, lockfile, mujoco-py\n","  Running setup.py install for mujoco-py ... \u001b[?25l-\b \b\\\b \berror\n","    Complete output from command /usr/bin/python3 -u -c \"import setuptools, tokenize;__file__='/tmp/pip-install-l9s1ruzo/mujoco-py/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\\r\\n', '\\n');f.close();exec(compile(code, __file__, 'exec'))\" install --record /tmp/pip-record-l44vafta/install-record.txt --single-version-externally-managed --compile:\n","    running install\n","    running build\n","    \n","    You appear to be missing MuJoCo.  We expected to find the file here: /root/.mujoco/mjpro150\n","    \n","    This package only provides python bindings, the library must be installed separately.\n","    \n","    Please follow the instructions on the README to install MuJoCo\n","    \n","        https://github.com/openai/mujoco-py#install-mujoco\n","    \n","    Which can be downloaded from the website\n","    \n","        https://www.roboti.us/index.html\n","    \n","    Traceback (most recent call last):\n","      File \"<string>\", line 1, in <module>\n","      File \"/tmp/pip-install-l9s1ruzo/mujoco-py/setup.py\", line 44, in <module>\n","        tests_require=read_requirements_file('requirements.dev.txt'),\n","      File \"/usr/local/lib/python3.6/dist-packages/setuptools/__init__.py\", line 143, in setup\n","        return distutils.core.setup(**attrs)\n","      File \"/usr/lib/python3.6/distutils/core.py\", line 148, in setup\n","        dist.run_commands()\n","      File \"/usr/lib/python3.6/distutils/dist.py\", line 955, in run_commands\n","        self.run_command(cmd)\n","      File \"/usr/lib/python3.6/distutils/dist.py\", line 974, in run_command\n","        cmd_obj.run()\n","      File \"/usr/local/lib/python3.6/dist-packages/setuptools/command/install.py\", line 61, in run\n","        return orig.install.run(self)\n","      File \"/usr/lib/python3.6/distutils/command/install.py\", line 589, in run\n","        self.run_command('build')\n","      File \"/usr/lib/python3.6/distutils/cmd.py\", line 313, in run_command\n","        self.distribution.run_command(command)\n","      File \"/usr/lib/python3.6/distutils/dist.py\", line 974, in run_command\n","        cmd_obj.run()\n","      File \"/tmp/pip-install-l9s1ruzo/mujoco-py/setup.py\", line 28, in run\n","        import mujoco_py  # noqa: force build\n","      File \"/tmp/pip-install-l9s1ruzo/mujoco-py/mujoco_py/__init__.py\", line 3, in <module>\n","        from mujoco_py.builder import cymj, ignore_mujoco_warnings, functions, MujocoException\n","      File \"/tmp/pip-install-l9s1ruzo/mujoco-py/mujoco_py/builder.py\", line 502, in <module>\n","        mjpro_path, key_path = discover_mujoco()\n","      File \"/tmp/pip-install-l9s1ruzo/mujoco-py/mujoco_py/utils.py\", line 93, in discover_mujoco\n","        raise Exception(message)\n","    Exception:\n","    You appear to be missing MuJoCo.  We expected to find the file here: /root/.mujoco/mjpro150\n","    \n","    This package only provides python bindings, the library must be installed separately.\n","    \n","    Please follow the instructions on the README to install MuJoCo\n","    \n","        https://github.com/openai/mujoco-py#install-mujoco\n","    \n","    Which can be downloaded from the website\n","    \n","        https://www.roboti.us/index.html\n","    \n","    \n","    ----------------------------------------\n","\u001b[31mCommand \"/usr/bin/python3 -u -c \"import setuptools, tokenize;__file__='/tmp/pip-install-l9s1ruzo/mujoco-py/setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\\r\\n', '\\n');f.close();exec(compile(code, __file__, 'exec'))\" install --record /tmp/pip-record-l44vafta/install-record.txt --single-version-externally-managed --compile\" failed with error code 1 in /tmp/pip-install-l9s1ruzo/mujoco-py/\u001b[0m\n","\u001b[?25h"],"name":"stdout"}]},{"metadata":{"id":"GpuVlNUzg8kw","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":542},"outputId":"91996bb4-9b5f-4953-da6c-2c826f317d25","executionInfo":{"status":"ok","timestamp":1545346722224,"user_tz":300,"elapsed":117933,"user":{"displayName":"Jack Buttimer","photoUrl":"","userId":"00346238941589315254"}}},"cell_type":"code","source":["!pip install zstd"],"execution_count":3,"outputs":[{"output_type":"stream","text":["Collecting zstd\n","\u001b[?25l  Downloading https://files.pythonhosted.org/packages/08/ea/693d977411af8f9a43aee43a092e8b30542cdb9c5a326739ff844549cf93/zstd-1.3.5.1.tar.gz (423kB)\n","\u001b[K    100% |████████████████████████████████| 430kB 10.6MB/s \n","\u001b[?25hBuilding wheels for collected packages: zstd\n","  Running setup.py bdist_wheel for zstd ... \u001b[?25l-\b \b\\\b \b|\b \b/\b \b-\b \b\\\b \b|\b \b/\b \b-\b \b\\\b \b|\b \b/\b \b-\b \b\\\b \b|\b \b/\b \b-\b \b\\\b \bdone\n","\u001b[?25h  Stored in directory: /root/.cache/pip/wheels/22/6f/7f/4c4c50243696356563946bb036b7b45b3463d5f0d588ca73a7\n","Successfully built zstd\n","Installing collected packages: zstd\n","Successfully installed zstd-1.3.5.1\n"],"name":"stdout"}]},{"metadata":{"id":"crakXDOUduat","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"73af6e1d-a682-4c52-b1e5-05f028469d4a","executionInfo":{"status":"ok","timestamp":1545346724410,"user_tz":300,"elapsed":120076,"user":{"displayName":"Jack Buttimer","photoUrl":"","userId":"00346238941589315254"}}},"cell_type":"code","source":["import tensorflow as tf\n","device_name = tf.test.gpu_device_name()\n","if device_name != '/device:GPU:0':\n","  raise SystemError('GPU device not found')\n","print('Found GPU at: {}'.format(device_name))"],"execution_count":4,"outputs":[{"output_type":"stream","text":["Found GPU at: /device:GPU:0\n"],"name":"stdout"}]},{"metadata":{"id":"hZcL8CtPd8GU","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":119},"outputId":"61f0c223-7ab3-4988-f5a2-c6c2a0f63d4e","executionInfo":{"status":"ok","timestamp":1545346737086,"user_tz":300,"elapsed":132710,"user":{"displayName":"Jack Buttimer","photoUrl":"","userId":"00346238941589315254"}}},"cell_type":"code","source":["import tensorflow as tf\n","import timeit\n","\n","# See https://www.tensorflow.org/tutorials/using_gpu#allowing_gpu_memory_growth\n","config = tf.ConfigProto()\n","config.gpu_options.allow_growth = True\n","\n","with tf.device('/cpu:0'):\n","  random_image_cpu = tf.random_normal((100, 100, 100, 3))\n","  net_cpu = tf.layers.conv2d(random_image_cpu, 32, 7)\n","  net_cpu = tf.reduce_sum(net_cpu)\n","\n","with tf.device('/gpu:0'):\n","  random_image_gpu = tf.random_normal((100, 100, 100, 3))\n","  net_gpu = tf.layers.conv2d(random_image_gpu, 32, 7)\n","  net_gpu = tf.reduce_sum(net_gpu)\n","\n","sess = tf.Session(config=config)\n","\n","# Test execution once to detect errors early.\n","try:\n","  sess.run(tf.global_variables_initializer())\n","except tf.errors.InvalidArgumentError:\n","  print(\n","      '\\n\\nThis error most likely means that this notebook is not '\n","      'configured to use a GPU.  Change this in Notebook Settings via the '\n","      'command palette (cmd/ctrl-shift-P) or the Edit menu.\\n\\n')\n","  raise\n","\n","def cpu():\n","  sess.run(net_cpu)\n","  \n","def gpu():\n","  sess.run(net_gpu)\n","  \n","# Runs the op several times.\n","print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '\n","      '(batch x height x width x channel). Sum of ten runs.')\n","print('CPU (s):')\n","cpu_time = timeit.timeit('cpu()', number=10, setup=\"from __main__ import cpu\")\n","print(cpu_time)\n","print('GPU (s):')\n","gpu_time = timeit.timeit('gpu()', number=10, setup=\"from __main__ import gpu\")\n","print(gpu_time)\n","print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))\n","\n","sess.close()"],"execution_count":5,"outputs":[{"output_type":"stream","text":["Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.\n","CPU (s):\n","10.467305953999983\n","GPU (s):\n","1.958274517999996\n","GPU speedup over CPU: 5x\n"],"name":"stdout"}]},{"metadata":{"id":"DFD7lddxeDyo","colab_type":"code","colab":{}},"cell_type":"code","source":["\"\"\"\n","All viable commands that can be sent to the engine\n","\"\"\"\n","from enum import Enum\n","class commands(Enum) :\n","    NORTH = 'n'\n","    SOUTH = 's'\n","    EAST = 'e'\n","    WEST = 'w'\n","    STAY_STILL = 'o'\n","    GENERATE = 'g'\n","    CONSTRUCT = 'c'\n","    MOVE = 'm'\n","\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"aN1mLENpeEvA","colab_type":"code","colab":{}},"cell_type":"code","source":["class Direction:\n","    \"\"\"\n","    Holds positional tuples in relation to cardinal directions\n","    \"\"\"\n","    North = (0, -1)\n","    South = (0, 1)\n","    East = (1, 0)\n","    West = (-1, 0)\n","\n","    Still = (0, 0)\n","\n","    @staticmethod\n","    def get_all_cardinals():\n","        \"\"\"\n","        Returns all contained items in each cardinal\n","        :return: An array of cardinals\n","        \"\"\"\n","        return [Direction.North, Direction.South, Direction.East, Direction.West]\n","\n","    @staticmethod\n","    def convert(direction):\n","        \"\"\"\n","        Converts from this direction tuple notation to the engine's string notation\n","        :param direction: the direction in this notation\n","        :return: The character equivalent for the game engine\n","        \"\"\"\n","        if direction == Direction.North:\n","            return commands.NORTH\n","        if direction == Direction.South:\n","            return commands.SOUTH\n","        if direction == Direction.East:\n","            return commands.EAST\n","        if direction == Direction.West:\n","            return commands.WEST\n","        if direction == Direction.Still:\n","            return commands.STAY_STILL\n","        else:\n","            raise IndexError\n","\n","    @staticmethod\n","    def invert(direction):\n","        \"\"\"\n","        Returns the opposite cardinal direction given a direction\n","        :param direction: The input direction\n","        :return: The opposite direction\n","        \"\"\"\n","        if direction == Direction.North:\n","            return Direction.South\n","        if direction == Direction.South:\n","            return Direction.North\n","        if direction == Direction.East:\n","            return Direction.West\n","        if direction == Direction.West:\n","            return Direction.East\n","        if direction == Direction.Still:\n","            return Direction.Still\n","        else:\n","            raise IndexError\n","\n","\n","class Position:\n","    def __init__(self, x, y):\n","        self.x = x\n","        self.y = y\n","\n","    def normalize(self, size):\n","        self.x = self.x % size\n","        self.y = self.y % size\n","\n","    def directional_offset(self, direction):\n","        \"\"\"\n","        Returns the position considering a Direction cardinal tuple\n","        :param direction: the direction cardinal tuple\n","        :return: a new position moved in that direction\n","        \"\"\"\n","        return self + Position(*direction)\n","\n","    def get_surrounding_cardinals(self):\n","        \"\"\"\n","        :return: Returns a list of all positions around this specific position in each cardinal direction\n","        \"\"\"\n","        return [self.directional_offset(current_direction) for current_direction in Direction.get_all_cardinals()]\n","\n","    def __add__(self, other):\n","        return Position(self.x + other.x, self.y + other.y)\n","\n","    def __sub__(self, other):\n","        return Position(self.x - other.x, self.y - other.y)\n","\n","    def __iadd__(self, other):\n","        self.x += other.x\n","        self.y += other.y\n","        return self\n","\n","    def __isub__(self, other):\n","        self.x -= other.x\n","        self.y -= other.y\n","        return self\n","\n","    def __abs__(self):\n","        return Position(abs(self.x), abs(self.y))\n","\n","    def __eq__(self, other):\n","        return self.x == other.x and self.y == other.y\n","\n","    def __ne__(self, other):\n","        return not self.__eq__(other)\n","\n","    def __repr__(self):\n","        return \"{}({}, {})\".format(self.__class__.__name__,\n","                                   self.x,\n","                                   self.y)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"rqrsf1yNeHCY","colab_type":"code","colab":{}},"cell_type":"code","source":["import sys\n","\n","class networking() :\n","    def send_commands(commands):\n","        \"\"\"\n","        Sends a list of commands to the engine.\n","        :param commands: The list of commands to send.\n","        :return: nothing.\n","        \"\"\"\n","        print(\" \".join(commands))\n","        sys.stdout.flush()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"99HxBGaPeI90","colab_type":"code","colab":{}},"cell_type":"code","source":["\"\"\"\n","Constants representing the game variation being played.\n","Most constants are global and come from game engine and are immutable and are strictly informational.\n","Some constants are only used by the local game client and so are mutable.\n","\"\"\"\n","class constants() :\n","    ################################################\n","    # Local and mutable constants.\n","\n","    \"\"\"Maximum number of steps to consider in pathfinding.\"\"\"\n","    MAX_BFS_STEPS = 1024  # = can search a 32x32 area completely\n","\n","    ################################################\n","    # Global and immutable constants.\n","\n","    \"\"\"The maximum amount of halite a ship can carry.\"\"\"\n","    MAX_HALITE = 1000\n","    \"\"\"The cost to build a single ship.\"\"\"\n","    SHIP_COST = 500\n","    \"\"\"The cost to build a dropoff.\"\"\"\n","    DROPOFF_COST = 2000\n","    \"\"\"The maximum number of turns a game can last.\"\"\"\n","    MAX_TURNS = 500\n","    \"\"\"1/EXTRACT_RATIO halite (rounded) is collected from a square per turn.\"\"\"\n","    EXTRACT_RATIO = 4\n","    \"\"\"1/MOVE_COST_RATIO halite (rounded) is needed to move off a cell.\"\"\"\n","    MOVE_COST_RATIO = 10\n","\n","    def load_constants(constants):\n","        \"\"\"\n","        Load constants from JSON given by the game engine.\n","        \"\"\"\n","        global SHIP_COST, DROPOFF_COST, MAX_HALITE, MAX_TURNS\n","        global EXTRACT_RATIO, MOVE_COST_RATIO\n","        SHIP_COST = constants.get('NEW_ENTITY_ENERGY_COST', SHIP_COST)\n","        DROPOFF_COST = constants.get('DROPOFF_COST', DROPOFF_COST)\n","        MAX_HALITE = constants.get('MAX_ENERGY', MAX_HALITE)\n","        MAX_TURNS = constants.get('MAX_TURNS', MAX_TURNS)\n","        EXTRACT_RATIO = constants.get('EXTRACT_RATIO', EXTRACT_RATIO)\n","        MOVE_COST_RATIO = constants.get('MOVE_COST_RATIO', MOVE_COST_RATIO)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"ywKOvWkSeXT9","colab_type":"code","colab":{}},"cell_type":"code","source":["import math\n","import gym\n","from gym import spaces, logger\n","from gym.utils import seeding\n","import numpy as np\n","import random\n","\n","MAX_TURNS = 150\n","\n","# TODO: Cargo Limit\n","\n","'''\n","Simulates the Halite engine in an OpenAI gym environment\n","Allows for single-player, lone-ship games on 32x32 maps\n","Gamemap data is parsed from generated replay files\n","'''\n","class HaliteEnv(gym.Env):\n","    \n","    '''\n","    reward_type: 'immediate' or 'delayed'\n","        'immediate' -> reward is deposit amount, granted every deposit\n","        'delayed'   -> reward is total halite collected, granted at end of game\n","    '''\n","    def __init__(self, reward_type = 'immediate'):\n","        self.action_space = spaces.Discrete(5)\n","        self.observation_space = spaces.Box(low=0, high=255, shape=(3, 33, 33), dtype=np.uint8)\n","        self.seed()\n","        self.viewer = None\n","        \n","        self.reward_type = reward_type\n","        self.directions = [Direction.North, Direction.South, Direction.East, Direction.West, Direction.Still]\n","        #TODO: use parse file once no longer a jupyter notebook\n","        #self.maps = parse.load_maps('replays32')\n","        self.maps = load_maps('/content/drive/My Drive/replays32', 10)\n","        self.reset()\n","\n","    def seed(self, seed=None):\n","        self.np_random, seed = seeding.np_random(seed)\n","        return [seed]\n","\n","    def step(self, action):\n","        assert self.action_space.contains(action), \"%r (%s) invalid\"%(action, type(action))\n","        self.turn += 1\n","        reward = 0\n","        \n","        # Move ship\n","        direction = self.directions[action]\n","        moving = False\n","        if direction != Direction.Still:\n","            move_cost = (int)(self.map[self.ship.position.y][self.ship.position.x].halite_amount / 10)\n","            if self.ship.halite_amount >= move_cost:\n","                self.ship.halite_amount -= move_cost\n","                moving = True\n","                target = self.ship.position.directional_offset(direction)\n","                target.normalize(32)\n","                self.ship.position = target\n","                print('Moved ' + str(direction) + ' to ' + str(target))\n","        \n","        # Deposit halite\n","        if self.ship.position == self.shipyard.position:\n","            if self.reward_type == 'immediate':\n","                reward = self.ship.halite_amount\n","            self.bank += self.ship.halite_amount\n","            self.ship.halite_amount = 0\n","           \n","        # Mine halite\n","        if direction == Direction.Still or not moving:\n","            mined_halite = (int)(self.map[self.ship.position.y][self.ship.position.x].halite_amount / 4)\n","            self.ship.halite_amount += mined_halite\n","            print('Mined at ' + str(self.ship.position))\n","            self.map[self.ship.position.y][self.ship.position.x].halite_amount -= mined_halite\n","        \n","        print('Cargo: ' + str(self.ship.halite_amount))\n","        \n","        done = bool(self.turn > MAX_TURNS)\n","        if done:\n","            if self.steps_beyond_done == 0 and self.reward_type == 'delayed':\n","                reward = self.bank\n","            if self.steps_beyond_done > 0:\n","                logger.warn(\"You are calling 'step()' even though this environment has already returned done = True -- any further steps are undefined behavior.\")\n","            self.steps_beyond_done += 1\n","\n","        return self.get_observation(), reward, done, {}\n","\n","    def reset(self):\n","        self.steps_beyond_done = 0\n","        \n","        self.turn = 0\n","        self.map = random.choice(self.maps)\n","        self.shipyard = Shipyard(0, -1, Position(12, 24))\n","        self.ship = Ship(0, 0, Position(12, 24), 0)\n","        self.bank = 0\n","        \n","        return self.get_observation()\n","    \n","    '''\n","    Converts the map state into an observation\n","    The observeration is a 3x33x33 array of the cells surrounding the ship, centered on the ship\n","    Channel 1: map halite amounts (scaled from 0-1000 to 0-250)\n","    Channel 2: ship cargo\n","    Channel 3: shipyard/dropoff locations\n","    '''\n","    def get_observation(self):\n","        \n","        halite_layer = np.zeros((33, 33), dtype=int)\n","        for dx in range(-16, 17):\n","            for dy in range(-16, 17):\n","                cell = Position(dx, dy) + self.ship.position\n","                cell.normalize(32)\n","                halite_layer[16+dy][16+dx] = (int)(self.map[cell.y][cell.x].halite_amount / 4)\n","        \n","        ship_layer = np.zeros((33, 33), dtype=int)\n","        ship_layer[16][16] = (int)(self.ship.halite_amount / 4)\n","        \n","        dropoff_layer = np.zeros((33, 33), dtype=int)\n","        sy_idx = self.shipyard.position - self.ship.position + Position(16, 16)\n","        sy_idx.normalize(33)\n","        dropoff_layer[sy_idx.y][sy_idx.x] = 1\n","        return np.array((halite_layer, ship_layer, dropoff_layer))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Yu0Gq72neL9G","colab_type":"code","colab":{}},"cell_type":"code","source":["#!/usr/bin/env python\n","import abc\n","import json\n","import logging\n","import queue\n","\n","\n","class Entity(abc.ABC):\n","    \"\"\"\n","    Base Entity Class from whence Ships, Dropoffs and Shipyards inherit\n","    \"\"\"\n","    def __init__(self, owner, id, position):\n","        self.owner = owner\n","        self.id = id\n","        self.position = position\n","\n","    @staticmethod\n","    def _generate(player_id):\n","        \"\"\"\n","        Method which creates an entity for a specific player given input from the engine.\n","        :param player_id: The player id for the player who owns this entity\n","        :return: An instance of Entity along with its id\n","        \"\"\"\n","        ship_id, x_position, y_position = map(int, input().split())\n","        return ship_id, Entity(player_id, ship_id, Position(x_position, y_position))\n","\n","    def __repr__(self):\n","        return \"{}(id={}, {})\".format(self.__class__.__name__,\n","                                      self.id,\n","                                      self.position)\n","\n","\n","class Dropoff(Entity):\n","    \"\"\"\n","    Dropoff class for housing dropoffs\n","    \"\"\"\n","    pass\n","\n","\n","class Shipyard(Entity):\n","    \"\"\"\n","    Shipyard class to house shipyards\n","    \"\"\"\n","    def spawn(self):\n","        \"\"\"Return a move to spawn a new ship.\"\"\"\n","        return commands.GENERATE\n","\n","\n","class Ship(Entity):\n","    \"\"\"\n","    Ship class to house ship entities\n","    \"\"\"\n","    def __init__(self, owner, id, position, halite_amount):\n","        super().__init__(owner, id, position)\n","        self.halite_amount = halite_amount\n","\n","    @property\n","    def is_full(self):\n","        \"\"\"Is this ship at max halite capacity?\"\"\"\n","        return self.halite_amount >= constants.MAX_HALITE\n","\n","    def make_dropoff(self):\n","        \"\"\"Return a move to transform this ship into a dropoff.\"\"\"\n","        return \"{} {}\".format(commands.CONSTRUCT, self.id)\n","\n","    def move(self, direction):\n","        \"\"\"\n","        Return a move to move this ship in a direction without\n","        checking for collisions.\n","        \"\"\"\n","        raw_direction = direction\n","        if not isinstance(direction, str) or direction not in \"nsew\":\n","            raw_direction = Direction.convert(direction)\n","        return \"{} {} {}\".format(commands.MOVE, self.id, raw_direction)\n","\n","    def stay_still(self):\n","        \"\"\"\n","        Don't move this ship.\n","        \"\"\"\n","        return \"{} {} {}\".format(commands.MOVE, self.id, commands.STAY_STILL)\n","\n","    @staticmethod\n","    def _generate(player_id):\n","        \"\"\"\n","        Creates an instance of a ship for a given player given the engine's input.\n","        :param player_id: The id of the player who owns this ship\n","        :return: The ship id and ship object\n","        \"\"\"\n","        ship_id, x_position, y_position, halite = map(int, input().split())\n","        return ship_id, Ship(player_id, ship_id, Position(x_position, y_position), halite)\n","\n","    def __repr__(self):\n","        return \"{}(id={}, {}, cargo={} halite)\".format(self.__class__.__name__,\n","                                                       self.id,\n","                                                       self.position,\n","                                                       self.halite_amount)\n","\n","\n","class Game:\n","    \"\"\"\n","    The game object holds all metadata pertinent to the game and all its contents\n","    \"\"\"\n","    def __init__(self):\n","        \"\"\"\n","        Initiates a game object collecting all start-state instances for the contained items for pre-game.\n","        Also sets up basic logging.\n","        \"\"\"\n","        self.turn_number = 0\n","\n","        # Grab constants JSON\n","        raw_constants = input()\n","        constants.load_constants(json.loads(raw_constants))\n","\n","        num_players, self.my_id = map(int, input().split())\n","\n","        logging.basicConfig(\n","            filename=\"bot-{}.log\".format(self.my_id),\n","            filemode=\"w\",\n","            level=logging.DEBUG,\n","        )\n","\n","        self.players = {}\n","        for player in range(num_players):\n","            self.players[player] = Player._generate()\n","        self.me = self.players[self.my_id]\n","        self.game_map = GameMap._generate()\n","\n","    def ready(self, name):\n","        \"\"\"\n","        Indicate that your bot is ready to play.\n","        :param name: The name of your bot\n","        \"\"\"\n","        networking.send_commands([name])\n","\n","    def update_frame(self):\n","        \"\"\"\n","        Updates the game object's state.\n","        :returns: nothing.\n","        \"\"\"\n","        self.turn_number = int(input())\n","        logging.info(\"=============== TURN {:03} ================\".format(self.turn_number))\n","\n","        for _ in range(len(self.players)):\n","            player, num_ships, num_dropoffs, halite = map(int, input().split())\n","            self.players[player]._update(num_ships, num_dropoffs, halite)\n","\n","        self.game_map._update()\n","\n","        # Mark cells with ships as unsafe for navigation\n","        for player in self.players.values():\n","            for ship in player.get_ships():\n","                self.game_map[ship.position].mark_unsafe(ship)\n","\n","            self.game_map[player.shipyard.position].structure = player.shipyard\n","            for dropoff in player.get_dropoffs():\n","                self.game_map[dropoff.position].structure = dropoff\n","\n","    @staticmethod\n","    def end_turn(commands):\n","        \"\"\"\n","        Method to send all commands to the game engine, effectively ending your turn.\n","        :param commands: Array of commands to send to engine\n","        :return: nothing.\n","        \"\"\"\n","        networking.send_commands(commands)\n","\n","\n","class Player:\n","    \"\"\"\n","    Player object containing all items/metadata pertinent to the player.\n","    \"\"\"\n","    def __init__(self, player_id, shipyard, halite=0):\n","        self.id = player_id\n","        self.shipyard = shipyard\n","        self.halite_amount = halite\n","        self._ships = {}\n","        self._dropoffs = {}\n","\n","    def get_ship(self, ship_id):\n","        \"\"\"\n","        Returns a singular ship mapped by the ship id\n","        :param ship_id: The ship id of the ship you wish to return\n","        :return: the ship object.\n","        \"\"\"\n","        return self._ships[ship_id]\n","\n","    def get_ships(self):\n","        \"\"\"\n","        :return: Returns all ship objects in a list\n","        \"\"\"\n","        return self._ships.values()\n","\n","    def get_dropoff(self, dropoff_id):\n","        \"\"\"\n","        Returns a singular dropoff mapped by its id\n","        :param dropoff_id: The dropoff id to return\n","        :return: The dropoff object\n","        \"\"\"\n","        return self._dropoffs[dropoff_id]\n","\n","    def get_dropoffs(self):\n","        \"\"\"\n","        :return: Returns all dropoff objects in a list\n","        \"\"\"\n","        return self._dropoffs.values()\n","\n","\n","    @staticmethod\n","    def _generate():\n","        \"\"\"\n","        Creates a player object from the input given by the game engine\n","        :return: The player object\n","        \"\"\"\n","        player, shipyard_x, shipyard_y = map(int, input().split())\n","        return Player(player, Shipyard(player, -1, Position(shipyard_x, shipyard_y)))\n","\n","    def _update(self, num_ships, num_dropoffs, halite):\n","        \"\"\"\n","        Updates this player object considering the input from the game engine for the current specific turn.\n","        :param num_ships: The number of ships this player has this turn\n","        :param num_dropoffs: The number of dropoffs this player has this turn\n","        :param halite: How much halite the player has in total\n","        :return: nothing.\n","        \"\"\"\n","        self.halite_amount = halite\n","        self._ships = {id: ship for (id, ship) in [Ship._generate(self.id) for _ in range(num_ships)]}\n","        self._dropoffs = {id: dropoff for (id, dropoff) in [Dropoff._generate(self.id) for _ in range(num_dropoffs)]}\n","\n","\n","class MapCell:\n","    \"\"\"A cell on the game map.\"\"\"\n","    def __init__(self, position, halite):\n","        self.position = position\n","        self.halite_amount = halite\n","        self.ship = None\n","        self.structure = None\n","\n","    @property\n","    def is_empty(self):\n","        \"\"\"\n","        :return: Whether this cell has no ships or structures\n","        \"\"\"\n","        return self.ship is None and self.structure is None\n","\n","    @property\n","    def is_occupied(self):\n","        \"\"\"\n","        :return: Whether this cell has any ships\n","        \"\"\"\n","        return self.ship is not None\n","\n","    @property\n","    def has_structure(self):\n","        \"\"\"\n","        :return: Whether this cell has any structures\n","        \"\"\"\n","        return self.structure is not None\n","\n","    @property\n","    def structure_type(self):\n","        \"\"\"\n","        :return: What is the structure type in this cell\n","        \"\"\"\n","        return None if not self.structure else type(self.structure)\n","\n","    def mark_unsafe(self, ship):\n","        \"\"\"\n","        Mark this cell as unsafe (occupied) for navigation.\n","        Use in conjunction with GameMap.get_safe_move.\n","        \"\"\"\n","        self.ship = ship\n","\n","    def __eq__(self, other):\n","        return self.position == other.position\n","\n","    def __ne__(self, other):\n","        return not self.__eq__(other)\n","\n","    def __str__(self):\n","        return 'MapCell({}, halite={})'.format(self.position, self.halite_amount)\n","\n","\n","class GameMap:\n","    \"\"\"\n","    The game map.\n","    Can be indexed by a position, or by a contained entity.\n","    Coordinates start at 0. Coordinates are normalized for you\n","    \"\"\"\n","    def __init__(self, cells, width, height):\n","        self.width = width\n","        self.height = height\n","        self._cells = cells\n","\n","    def __getitem__(self, location):\n","        \"\"\"\n","        Getter for position object or entity objects within the game map\n","        :param location: the position or entity to access in this map\n","        :return: the contents housing that cell or entity\n","        \"\"\"\n","        if isinstance(location, Position):\n","            location = self.normalize(location)\n","            return self._cells[location.y][location.x]\n","        elif isinstance(location, Entity):\n","            return self._cells[location.position.y][location.position.x]\n","        return None\n","\n","    def calculate_distance(self, source, target):\n","        \"\"\"\n","        Compute the Manhattan distance between two locations.\n","        Accounts for wrap-around.\n","        :param source: The source from where to calculate\n","        :param target: The target to where calculate\n","        :return: The distance between these items\n","        \"\"\"\n","        resulting_position = abs(source - target)\n","        return min(resulting_position.x, self.width - resulting_position.x) + \\\n","            min(resulting_position.y, self.height - resulting_position.y)\n","\n","    def normalize(self, position):\n","        \"\"\"\n","        Normalized the position within the bounds of the toroidal map.\n","        i.e.: Takes a point which may or may not be within width and\n","        height bounds, and places it within those bounds considering\n","        wraparound.\n","        :param position: A position object.\n","        :return: A normalized position object fitting within the bounds of the map\n","        \"\"\"\n","        return Position(position.x % self.width, position.y % self.height)\n","\n","    @staticmethod\n","    def _get_target_direction(source, target):\n","        \"\"\"\n","        Returns where in the cardinality spectrum the target is from source. e.g.: North, East; South, West; etc.\n","        NOTE: Ignores toroid\n","        :param source: The source position\n","        :param target: The target position\n","        :return: A tuple containing the target Direction. A tuple item (or both) could be None if within same coords\n","        \"\"\"\n","        return (Direction.South if target.y > source.y else Direction.North if target.y < source.y else None,\n","                Direction.East if target.x > source.x else Direction.West if target.x < source.x else None)\n","\n","    def get_unsafe_moves(self, source, destination):\n","        \"\"\"\n","        Return the Direction(s) to move closer to the target point, or empty if the points are the same.\n","        This move mechanic does not account for collisions. The multiple directions are if both directional movements\n","        are viable.\n","        :param source: The starting position\n","        :param destination: The destination towards which you wish to move your object.\n","        :return: A list of valid (closest) Directions towards your target.\n","        \"\"\"\n","        possible_moves = []\n","        distance = abs(destination - source)\n","        y_cardinality, x_cardinality = self._get_target_direction(source, destination)\n","\n","        if distance.x != 0:\n","            possible_moves.append(x_cardinality if distance.x < (self.width / 2)\n","                                  else Direction.invert(x_cardinality))\n","        if distance.y != 0:\n","            possible_moves.append(y_cardinality if distance.y < (self.height / 2)\n","                                  else Direction.invert(y_cardinality))\n","        return possible_moves\n","\n","    def _bfs_traverse_safely(self, source, destination):\n","        \"\"\"\n","        Use a BFS to traverse the map safely, storing each previous cell in a visited cell.\n","        :param source: The source object\n","        :param destination: The destination object\n","        :return: The visited map if reachable. None otherwise\n","        \"\"\"\n","        visited_map = [[None for _ in range(self.width)] for _ in range(self.height)]\n","        potentials_queue = queue.Queue()\n","        potentials_queue.put(source)\n","        steps_taken = 0\n","        while not potentials_queue.empty():\n","            current_square = potentials_queue.get()\n","            if current_square == destination:\n","                return visited_map\n","            for suitor in current_square.position.get_surrounding_cardinals():\n","                suitor = self.normalize(suitor)\n","                if not self[suitor].is_occupied and not visited_map[suitor.y][suitor.x]:\n","                    potentials_queue.put(self[suitor])\n","                    visited_map[suitor.y][suitor.x] = current_square\n","\n","            steps_taken += 1\n","\n","            if steps_taken >= constants.MAX_BFS_STEPS:\n","                break\n","\n","        return None\n","\n","    @staticmethod\n","    def _find_first_move(source, destination, visited):\n","        \"\"\"\n","        Given a visited map, find the viable first move near the source and return it\n","        :param source: The first position\n","        :param destination: The target\n","        :param visited: A map containing the visited cell information from _bfs_traverse_safely\n","        :return: The first viable move\n","        \"\"\"\n","        current_square = destination\n","        previous = None\n","        while current_square is not None and current_square != source:\n","            previous = current_square\n","            current_square = visited[current_square.position.y][current_square.position.x]\n","        return previous\n","\n","    def _naive_navigate(self, source, destination):\n","        \"\"\"\n","        Returns a singular safe move towards the destination.\n","        :param source: Starting position\n","        :param destination: Ending position\n","        :return: A direction, or None if no such move exists.\n","        \"\"\"\n","        for direction in self.get_unsafe_moves(source, destination):\n","            target_pos = source.directional_offset(direction)\n","            if not self[target_pos].is_occupied:\n","                return direction\n","\n","        return None\n","\n","    def get_safe_move(self, source, destination):\n","        \"\"\"\n","        Returns the best (read: most optimal) singular safe move\n","        towards the destination.\n","        :param source: The source MapCell that you wish to move\n","        :param destination: The destination MapCell towards which you\n","        wish to move your object.\n","        :return: A single valid direction towards the destination\n","        accounting for collisions, or None if no such move exists.\n","        \"\"\"\n","        if not isinstance(source, MapCell) or not isinstance(destination, MapCell):\n","            raise AttributeError(\"Source and Destination must be of type MapCell\")\n","\n","        if source == destination:\n","            return None\n","\n","        visited_map = self._bfs_traverse_safely(source, destination)\n","        if not visited_map:\n","            return self._naive_navigate(source.position, destination.position)\n","\n","        safe_target_cell = self._find_first_move(source, destination, visited_map)\n","        if not safe_target_cell:\n","            return None\n","\n","        potential_moves = self.get_unsafe_moves(source.position, safe_target_cell.position)\n","        if not potential_moves:\n","            return None\n","\n","        return potential_moves[0]\n","\n","    @staticmethod\n","    def _generate():\n","        \"\"\"\n","        Creates a map object from the input given by the game engine\n","        :return: The map object\n","        \"\"\"\n","        map_width, map_height = map(int, input().split())\n","        game_map = [[None for _ in range(map_width)] for _ in range(map_height)]\n","        for y_position in range(map_height):\n","            cells = input().split()\n","            for x_position in range(map_width):\n","                game_map[y_position][x_position] = MapCell(Position(x_position, y_position),\n","                                                           int(cells[x_position]))\n","        return GameMap(game_map, map_width, map_height)\n","\n","    def _update(self):\n","        \"\"\"\n","        Updates this map object from the input given by the game engine\n","        :return: nothing\n","        \"\"\"\n","        # Mark cells as safe for navigation (will re-mark unsafe cells\n","        # later)\n","        for y in range(self.height):\n","            for x in range(self.width):\n","                self[Position(x, y)].ship = None\n","\n","        for _ in range(int(input())):\n","            cell_x, cell_y, cell_energy = map(int, input().split())\n","            self[Position(cell_x, cell_y)].halite_amount = cell_energy"],"execution_count":0,"outputs":[]},{"metadata":{"id":"gnxwuLZneOqv","colab_type":"code","colab":{}},"cell_type":"code","source":["import copy\n","import json\n","import os\n","import os.path\n","import zstd\n","from tqdm import tqdm\n","\n","'''\n","Parses the initial map data from this replay file\n","'''\n","def parse_initial_map(file_name):\n","    with open(file_name, 'rb') as f:\n","        data = json.loads(zstd.loads(f.read()))\n","    \n","    first_cells = []\n","    for r in range(len(data['production_map']['grid'])):\n","        row = []\n","        for c in range(len(data['production_map']['grid'][r])):\n","            row += [MapCell(Position(c, r), data['production_map']['grid'][r][c]['energy'])]\n","        first_cells.append(row)\n","    return first_cells\n","\n","'''\n","Loads initial map data from all replay files in folder_name\n","'''\n","def load_maps(folder_name, max_files=None):\n","    maps = []\n","    for file_name in tqdm(sorted(os.listdir(folder_name))):\n","        if not file_name.endswith(\".hlt\"):\n","            continue\n","        elif max_files is not None and len(maps) >= max_files:\n","            break\n","        else:\n","            maps.append(parse_initial_map(os.path.join(folder_name, file_name)))\n","    return maps"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Lz21eGSKfOKd","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]},{"metadata":{"id":"9OhYmag8eaS9","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":21621},"outputId":"4787c059-8205-44dc-df14-c1fe005841d9","executionInfo":{"status":"error","timestamp":1545349086437,"user_tz":300,"elapsed":2481894,"user":{"displayName":"Jack Buttimer","photoUrl":"","userId":"00346238941589315254"}}},"cell_type":"code","source":["import gym\n","\n","from baselines import deepq\n","\n","\n","def main():\n","    env = HaliteEnv()\n","    \n","    model2 = deepq.models.cnn_to_mlp(\n","        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],\n","        hiddens=[256],\n","    )\n","    act = deepq.learn(\n","        env,\n","        q_func=model2,\n","        lr=1e-3,\n","        max_timesteps=1000000,\n","        buffer_size=50000,\n","        exploration_fraction=0.1,\n","        exploration_final_eps=0.01,\n","        train_freq=4,\n","        print_freq=10,\n","        learning_starts=10000,\n","        target_network_update_freq=1000,\n","        gamma=0.99,\n","    )\n","    \n","    print(\"Saving model to halite_dqn.pkl\")\n","    act.save(\"halite_dqn.pkl\")\n","\n","\n","if __name__ == '__main__':\n","    main()"],"execution_count":13,"outputs":[{"output_type":"stream","text":["  2%|▎         | 10/400 [00:02<01:17,  5.00it/s]\n","/usr/local/lib/python3.6/dist-packages/numpy/core/fromnumeric.py:2957: RuntimeWarning: Mean of empty slice.\n","  out=out, **kwargs)\n","/usr/local/lib/python3.6/dist-packages/numpy/core/_methods.py:80: RuntimeWarning: invalid value encountered in double_scalars\n","  ret = ret.dtype.type(ret / rcount)\n"],"name":"stderr"},{"output_type":"stream","text":["--------------------------------------\n","| % time spent exploring  | 98       |\n","| episodes                | 10       |\n","| mean 100 episode reward | 54.9     |\n","| steps                   | 1358     |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 97       |\n","| episodes                | 20       |\n","| mean 100 episode reward | 50.1     |\n","| steps                   | 2868     |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 95       |\n","| episodes                | 30       |\n","| mean 100 episode reward | 53.1     |\n","| steps                   | 4378     |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 94       |\n","| episodes                | 40       |\n","| mean 100 episode reward | 44.5     |\n","| steps                   | 5888     |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 92       |\n","| episodes                | 50       |\n","| mean 100 episode reward | 42.7     |\n","| steps                   | 7398     |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 91       |\n","| episodes                | 60       |\n","| mean 100 episode reward | 40.6     |\n","| steps                   | 8908     |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 89       |\n","| episodes                | 70       |\n","| mean 100 episode reward | 38.2     |\n","| steps                   | 10418    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 88       |\n","| episodes                | 80       |\n","| mean 100 episode reward | 36.5     |\n","| steps                   | 11928    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 86       |\n","| episodes                | 90       |\n","| mean 100 episode reward | 33.7     |\n","| steps                   | 13438    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 85       |\n","| episodes                | 100      |\n","| mean 100 episode reward | 31.2     |\n","| steps                   | 14948    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 83       |\n","| episodes                | 110      |\n","| mean 100 episode reward | 28.8     |\n","| steps                   | 16458    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 82       |\n","| episodes                | 120      |\n","| mean 100 episode reward | 26.2     |\n","| steps                   | 17968    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 80       |\n","| episodes                | 130      |\n","| mean 100 episode reward | 21       |\n","| steps                   | 19478    |\n","--------------------------------------\n","Saving model due to mean reward increase: None -> 21.4\n","--------------------------------------\n","| % time spent exploring  | 79       |\n","| episodes                | 140      |\n","| mean 100 episode reward | 21.7     |\n","| steps                   | 20988    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 77       |\n","| episodes                | 150      |\n","| mean 100 episode reward | 19.4     |\n","| steps                   | 22498    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 76       |\n","| episodes                | 160      |\n","| mean 100 episode reward | 16.9     |\n","| steps                   | 24008    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 74       |\n","| episodes                | 170      |\n","| mean 100 episode reward | 15.2     |\n","| steps                   | 25518    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 73       |\n","| episodes                | 180      |\n","| mean 100 episode reward | 12.7     |\n","| steps                   | 27028    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 71       |\n","| episodes                | 190      |\n","| mean 100 episode reward | 12.2     |\n","| steps                   | 28538    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 70       |\n","| episodes                | 200      |\n","| mean 100 episode reward | 13.9     |\n","| steps                   | 30048    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 68       |\n","| episodes                | 210      |\n","| mean 100 episode reward | 13.6     |\n","| steps                   | 31558    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 67       |\n","| episodes                | 220      |\n","| mean 100 episode reward | 12.5     |\n","| steps                   | 33068    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 65       |\n","| episodes                | 230      |\n","| mean 100 episode reward | 11.9     |\n","| steps                   | 34578    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 64       |\n","| episodes                | 240      |\n","| mean 100 episode reward | 11.2     |\n","| steps                   | 36088    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 62       |\n","| episodes                | 250      |\n","| mean 100 episode reward | 10.1     |\n","| steps                   | 37598    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 61       |\n","| episodes                | 260      |\n","| mean 100 episode reward | 9.7      |\n","| steps                   | 39108    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 59       |\n","| episodes                | 270      |\n","| mean 100 episode reward | 9.2      |\n","| steps                   | 40618    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 58       |\n","| episodes                | 280      |\n","| mean 100 episode reward | 9.4      |\n","| steps                   | 42128    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 56       |\n","| episodes                | 290      |\n","| mean 100 episode reward | 8.9      |\n","| steps                   | 43638    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 55       |\n","| episodes                | 300      |\n","| mean 100 episode reward | 6.8      |\n","| steps                   | 45148    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 53       |\n","| episodes                | 310      |\n","| mean 100 episode reward | 4.5      |\n","| steps                   | 46658    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 52       |\n","| episodes                | 320      |\n","| mean 100 episode reward | 3.8      |\n","| steps                   | 48168    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 50       |\n","| episodes                | 330      |\n","| mean 100 episode reward | 3.8      |\n","| steps                   | 49678    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 49       |\n","| episodes                | 340      |\n","| mean 100 episode reward | 2.6      |\n","| steps                   | 51188    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 47       |\n","| episodes                | 350      |\n","| mean 100 episode reward | 2.4      |\n","| steps                   | 52698    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 46       |\n","| episodes                | 360      |\n","| mean 100 episode reward | 2.3      |\n","| steps                   | 54208    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 44       |\n","| episodes                | 370      |\n","| mean 100 episode reward | 2.2      |\n","| steps                   | 55718    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 43       |\n","| episodes                | 380      |\n","| mean 100 episode reward | 2.8      |\n","| steps                   | 57228    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 41       |\n","| episodes                | 390      |\n","| mean 100 episode reward | 2.9      |\n","| steps                   | 58738    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 40       |\n","| episodes                | 400      |\n","| mean 100 episode reward | 2.4      |\n","| steps                   | 60248    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 38       |\n","| episodes                | 410      |\n","| mean 100 episode reward | 2.2      |\n","| steps                   | 61758    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 37       |\n","| episodes                | 420      |\n","| mean 100 episode reward | 2.4      |\n","| steps                   | 63268    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 35       |\n","| episodes                | 430      |\n","| mean 100 episode reward | 2.6      |\n","| steps                   | 64778    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 34       |\n","| episodes                | 440      |\n","| mean 100 episode reward | 2.3      |\n","| steps                   | 66288    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 32       |\n","| episodes                | 450      |\n","| mean 100 episode reward | 2.4      |\n","| steps                   | 67798    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 31       |\n","| episodes                | 460      |\n","| mean 100 episode reward | 2.4      |\n","| steps                   | 69308    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 29       |\n","| episodes                | 470      |\n","| mean 100 episode reward | 2.4      |\n","| steps                   | 70818    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 28       |\n","| episodes                | 480      |\n","| mean 100 episode reward | 1.8      |\n","| steps                   | 72328    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 26       |\n","| episodes                | 490      |\n","| mean 100 episode reward | 1.6      |\n","| steps                   | 73838    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 25       |\n","| episodes                | 500      |\n","| mean 100 episode reward | 2.2      |\n","| steps                   | 75348    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 23       |\n","| episodes                | 510      |\n","| mean 100 episode reward | 2.3      |\n","| steps                   | 76858    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 22       |\n","| episodes                | 520      |\n","| mean 100 episode reward | 3.9      |\n","| steps                   | 78368    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 20       |\n","| episodes                | 530      |\n","| mean 100 episode reward | 3.7      |\n","| steps                   | 79878    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 19       |\n","| episodes                | 540      |\n","| mean 100 episode reward | 3.2      |\n","| steps                   | 81388    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 17       |\n","| episodes                | 550      |\n","| mean 100 episode reward | 3.3      |\n","| steps                   | 82898    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 16       |\n","| episodes                | 560      |\n","| mean 100 episode reward | 3.7      |\n","| steps                   | 84408    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 14       |\n","| episodes                | 570      |\n","| mean 100 episode reward | 3.9      |\n","| steps                   | 85918    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 13       |\n","| episodes                | 580      |\n","| mean 100 episode reward | 5.9      |\n","| steps                   | 87428    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 11       |\n","| episodes                | 590      |\n","| mean 100 episode reward | 6.2      |\n","| steps                   | 88938    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 10       |\n","| episodes                | 600      |\n","| mean 100 episode reward | 6.3      |\n","| steps                   | 90448    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 8        |\n","| episodes                | 610      |\n","| mean 100 episode reward | 6.9      |\n","| steps                   | 91958    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 7        |\n","| episodes                | 620      |\n","| mean 100 episode reward | 5.7      |\n","| steps                   | 93468    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 5        |\n","| episodes                | 630      |\n","| mean 100 episode reward | 5.7      |\n","| steps                   | 94978    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 4        |\n","| episodes                | 640      |\n","| mean 100 episode reward | 12.8     |\n","| steps                   | 96488    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 2        |\n","| episodes                | 650      |\n","| mean 100 episode reward | 13       |\n","| steps                   | 97998    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 660      |\n","| mean 100 episode reward | 15       |\n","| steps                   | 99508    |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 670      |\n","| mean 100 episode reward | 15.3     |\n","| steps                   | 101018   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 680      |\n","| mean 100 episode reward | 13.2     |\n","| steps                   | 102528   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 690      |\n","| mean 100 episode reward | 13.7     |\n","| steps                   | 104038   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 700      |\n","| mean 100 episode reward | 13.8     |\n","| steps                   | 105548   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 710      |\n","| mean 100 episode reward | 14.1     |\n","| steps                   | 107058   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 720      |\n","| mean 100 episode reward | 13.7     |\n","| steps                   | 108568   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 730      |\n","| mean 100 episode reward | 14.1     |\n","| steps                   | 110078   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 740      |\n","| mean 100 episode reward | 7.8      |\n","| steps                   | 111588   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 750      |\n","| mean 100 episode reward | 8.6      |\n","| steps                   | 113098   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 760      |\n","| mean 100 episode reward | 6.4      |\n","| steps                   | 114608   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 770      |\n","| mean 100 episode reward | 6.3      |\n","| steps                   | 116118   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 780      |\n","| mean 100 episode reward | 6.2      |\n","| steps                   | 117628   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 790      |\n","| mean 100 episode reward | 6        |\n","| steps                   | 119138   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 800      |\n","| mean 100 episode reward | 5.8      |\n","| steps                   | 120648   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 810      |\n","| mean 100 episode reward | 5.2      |\n","| steps                   | 122158   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 820      |\n","| mean 100 episode reward | 5.2      |\n","| steps                   | 123668   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 830      |\n","| mean 100 episode reward | 5.1      |\n","| steps                   | 125178   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 840      |\n","| mean 100 episode reward | 4.4      |\n","| steps                   | 126688   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 850      |\n","| mean 100 episode reward | 3.4      |\n","| steps                   | 128198   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 860      |\n","| mean 100 episode reward | 3.4      |\n","| steps                   | 129708   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 870      |\n","| mean 100 episode reward | 3.3      |\n","| steps                   | 131218   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 880      |\n","| mean 100 episode reward | 3.3      |\n","| steps                   | 132728   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 890      |\n","| mean 100 episode reward | 2.9      |\n","| steps                   | 134238   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 900      |\n","| mean 100 episode reward | 2.4      |\n","| steps                   | 135748   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 910      |\n","| mean 100 episode reward | 2.2      |\n","| steps                   | 137258   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 920      |\n","| mean 100 episode reward | 2        |\n","| steps                   | 138768   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 930      |\n","| mean 100 episode reward | 1.8      |\n","| steps                   | 140278   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 940      |\n","| mean 100 episode reward | 1.9      |\n","| steps                   | 141788   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 950      |\n","| mean 100 episode reward | 2        |\n","| steps                   | 143298   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 960      |\n","| mean 100 episode reward | 1.7      |\n","| steps                   | 144808   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 970      |\n","| mean 100 episode reward | 1.5      |\n","| steps                   | 146318   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 980      |\n","| mean 100 episode reward | 1.5      |\n","| steps                   | 147828   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 990      |\n","| mean 100 episode reward | 1.4      |\n","| steps                   | 149338   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1000     |\n","| mean 100 episode reward | 1.2      |\n","| steps                   | 150848   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1010     |\n","| mean 100 episode reward | 1.2      |\n","| steps                   | 152358   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1020     |\n","| mean 100 episode reward | 1.2      |\n","| steps                   | 153868   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1030     |\n","| mean 100 episode reward | 1        |\n","| steps                   | 155378   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1040     |\n","| mean 100 episode reward | 0.9      |\n","| steps                   | 156888   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1050     |\n","| mean 100 episode reward | 0.8      |\n","| steps                   | 158398   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1060     |\n","| mean 100 episode reward | 0.8      |\n","| steps                   | 159908   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1070     |\n","| mean 100 episode reward | 0.8      |\n","| steps                   | 161418   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1080     |\n","| mean 100 episode reward | 0.9      |\n","| steps                   | 162928   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1090     |\n","| mean 100 episode reward | 1        |\n","| steps                   | 164438   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1100     |\n","| mean 100 episode reward | 1.1      |\n","| steps                   | 165948   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1110     |\n","| mean 100 episode reward | 1        |\n","| steps                   | 167458   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1120     |\n","| mean 100 episode reward | 0.9      |\n","| steps                   | 168968   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1130     |\n","| mean 100 episode reward | 1        |\n","| steps                   | 170478   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1140     |\n","| mean 100 episode reward | 1        |\n","| steps                   | 171988   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1150     |\n","| mean 100 episode reward | 1        |\n","| steps                   | 173498   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1160     |\n","| mean 100 episode reward | 0.9      |\n","| steps                   | 175008   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1170     |\n","| mean 100 episode reward | 0.8      |\n","| steps                   | 176518   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1180     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 178028   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1190     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 179538   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1200     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 181048   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1210     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 182558   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1220     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 184068   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1230     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 185578   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1240     |\n","| mean 100 episode reward | 0.3      |\n","| steps                   | 187088   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1250     |\n","| mean 100 episode reward | 0.3      |\n","| steps                   | 188598   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1260     |\n","| mean 100 episode reward | 0.3      |\n","| steps                   | 190108   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1270     |\n","| mean 100 episode reward | 0.3      |\n","| steps                   | 191618   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1280     |\n","| mean 100 episode reward | 0.3      |\n","| steps                   | 193128   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1290     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 194638   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1300     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 196148   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1310     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 197658   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1320     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 199168   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1330     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 200678   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1340     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 202188   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1350     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 203698   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1360     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 205208   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1370     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 206718   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1380     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 208228   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1390     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 209738   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1400     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 211248   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1410     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 212758   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1420     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 214268   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1430     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 215778   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1440     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 217288   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1450     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 218798   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1460     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 220308   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1470     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 221818   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1480     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 223328   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1490     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 224838   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1500     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 226348   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1510     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 227858   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1520     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 229368   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1530     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 230878   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1540     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 232388   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1550     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 233898   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1560     |\n","| mean 100 episode reward | 0.7      |\n","| steps                   | 235408   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1570     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 236918   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1580     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 238428   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1590     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 239938   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1600     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 241448   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1610     |\n","| mean 100 episode reward | 0.7      |\n","| steps                   | 242958   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1620     |\n","| mean 100 episode reward | 0.8      |\n","| steps                   | 244468   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1630     |\n","| mean 100 episode reward | 0.9      |\n","| steps                   | 245978   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1640     |\n","| mean 100 episode reward | 0.9      |\n","| steps                   | 247488   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1650     |\n","| mean 100 episode reward | 0.9      |\n","| steps                   | 248998   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1660     |\n","| mean 100 episode reward | 0.7      |\n","| steps                   | 250508   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1670     |\n","| mean 100 episode reward | 0.7      |\n","| steps                   | 252018   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1680     |\n","| mean 100 episode reward | 0.7      |\n","| steps                   | 253528   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1690     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 255038   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1700     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 256548   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1710     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 258058   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1720     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 259568   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1730     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 261078   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1740     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 262588   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1750     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 264098   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1760     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 265608   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1770     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 267118   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1780     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 268628   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1790     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 270138   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1800     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 271648   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1810     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 273158   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1820     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 274668   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1830     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 276178   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1840     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 277688   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1850     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 279198   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1860     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 280708   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1870     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 282218   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1880     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 283728   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1890     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 285238   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1900     |\n","| mean 100 episode reward | 0.7      |\n","| steps                   | 286748   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1910     |\n","| mean 100 episode reward | 0.7      |\n","| steps                   | 288258   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1920     |\n","| mean 100 episode reward | 0.7      |\n","| steps                   | 289768   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1930     |\n","| mean 100 episode reward | 0.6      |\n","| steps                   | 291278   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1940     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 292788   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1950     |\n","| mean 100 episode reward | 0.5      |\n","| steps                   | 294298   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1960     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 295808   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1970     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 297318   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1980     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 298828   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 1990     |\n","| mean 100 episode reward | 0.4      |\n","| steps                   | 300338   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 2000     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 301848   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 2010     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 303358   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 2020     |\n","| mean 100 episode reward | 0.3      |\n","| steps                   | 304868   |\n","--------------------------------------\n","--------------------------------------\n","| % time spent exploring  | 1        |\n","| episodes                | 2030     |\n","| mean 100 episode reward | 0.2      |\n","| steps                   | 306378   |\n","--------------------------------------\n"],"name":"stdout"},{"output_type":"error","ename":"KeyboardInterrupt","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)","\u001b[0;32m<ipython-input-13-bb82c157db62>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     31\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     32\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__name__\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'__main__'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 33\u001b[0;31m     \u001b[0mmain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m<ipython-input-13-bb82c157db62>\u001b[0m in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m     23\u001b[0m         \u001b[0mlearning_starts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10000\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     24\u001b[0m         \u001b[0mtarget_network_update_freq\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m         \u001b[0mgamma\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.99\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     26\u001b[0m     )\n\u001b[1;32m     27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.6/dist-packages/baselines/deepq/simple.py\u001b[0m in \u001b[0;36mlearn\u001b[0;34m(env, q_func, lr, max_timesteps, buffer_size, exploration_fraction, exploration_final_eps, train_freq, batch_size, print_freq, checkpoint_freq, learning_starts, gamma, target_network_update_freq, prioritized_replay, prioritized_replay_alpha, prioritized_replay_beta0, prioritized_replay_beta_iters, prioritized_replay_eps, param_noise, callback)\u001b[0m\n\u001b[1;32m    242\u001b[0m             \u001b[0menv_action\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maction\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    243\u001b[0m             \u001b[0mreset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 244\u001b[0;31m             \u001b[0mnew_obs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrew\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0menv_action\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    245\u001b[0m             \u001b[0;31m# Store transition in the replay buffer.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    246\u001b[0m             \u001b[0mreplay_buffer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrew\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnew_obs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m<ipython-input-10-fcbee9100193>\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, action)\u001b[0m\n\u001b[1;32m     81\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msteps_beyond_done\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     82\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 83\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_observation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     84\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     85\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mreset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m<ipython-input-10-fcbee9100193>\u001b[0m in \u001b[0;36mget_observation\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    107\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mdy\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m17\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    108\u001b[0m                 \u001b[0mcell\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPosition\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdy\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mship\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mposition\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 109\u001b[0;31m                 \u001b[0mcell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnormalize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m32\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    110\u001b[0m                 \u001b[0mhalite_layer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdy\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mdx\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhalite_amount\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0;36m4\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    111\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m<ipython-input-7-5a875808b324>\u001b[0m in \u001b[0;36mnormalize\u001b[0;34m(self, size)\u001b[0m\n\u001b[1;32m     66\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mnormalize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     67\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mx\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 68\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0my\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0msize\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     69\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     70\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mdirectional_offset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdirection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mKeyboardInterrupt\u001b[0m: "]}]},{"metadata":{"id":"A5vkXZHnf4N2","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]}]}