acforvs
diff --git a/‎.gitignore‎
Lines changed: 130 additions & 0 deletions b/‎.gitignore‎
Lines changed: 130 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 145 additions & 0 deletions b/‎README.md‎
Lines changed: 145 additions & 0 deletions
diff --git a/‎config.yaml‎
Lines changed: 55 additions & 0 deletions b/‎config.yaml‎
Lines changed: 55 additions & 0 deletions
@@ -0,0 +1,130 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+.idea/
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 Vlad
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,145 @@
+# Multi-agent Path Finding using Reinforcement Learning
+
+
+![PyTorch](https://img.shields.io/badge/PyTorch-%23EE4C2C.svg?style=flat&logo=PyTorch&logoColor=white)
+![Poetry](https://img.shields.io/badge/Poetry-%2300C4CC.svg?style=flat&logo=Poetry&logoColor=white)
+![Black](https://img.shields.io/badge/code%20style-black-000000.svg)
+
+## Description
+
+**Multi-agent pathfinding in partially observable environments. Search-based vs. RL-based algorithms.**
+
+The main goal of this repository is to provide a DHC [1] model implementation alongside with some benchmarks and charts. 
+We also aim to compare the performance of the DHC model with the basic M* algorithm.
+
+## Requirements
+In order for `models.dhc.train` to be successfully run, you have to have a machine equipped with 1 GPU and several CPUs.
+Consider having `num_cpus - 2` actors configured through the `dhc.train.num_actors` in `config.yaml`
+
+**Attention: We do not guarantee the desired performance on a non-GPU machine.**
+
+While we aim at supporting MacOS, Linux and Windows platforms, the successful training is not guaranteed on a Windows-based machine. 
+The benchmarking script should work there, though. Please report it [here](https://github.com/acforvs/po-mapf-thesis/issues) if it doesn't.
+
+## Setting up
+1. Install [Poetry](https://python-poetry.org)
+2. Run [poetry install](https://python-poetry.org/docs/cli/#install) to install the dependencies
+
+If you see ``Failed to create the collection: Prompt dismissed..`` this error when trying to run `poetry install`, [consider](https://github.com/python-poetry/poetry/issues/1917#issuecomment-1251667047) executing this line first:
+```shell
+export PYTHON_KEYRING_BACKEND=keyring.backends.null.Keyring
+```
+
+## Repository description & Usage 
+1. `models` dir contains the weights of the trained models
+2. `config.yaml` - training & model params, environmental settings etc.
+3. `pathfinding/models` provides one with the implementation of different models
+
+## Models
+### DHC
+
+**D**istributed, **H**euristic and **C**ommunication [1]
+
+> To guide RL algorithm on long-horizon goal-oriented tasks, we embed the potential choices of shortest paths from single source as heuristic guidance instead of using a specific path as in most existing works. Our method treats each agent independently and trains the model from a single agent’s perspective. The final trained policy is applied to each agent for decentralized execution. The whole system is distributed during training and is trained under a curriculum learning strategy.
+
+![visAfter](./static/DHC_architecture.png)  
+![visAfter](./static/DHC_training.png)
+
+<details>
+    <summary>DHC</summary>
+
+#### Benchmarking 
+
+**To run the generated test suite, run**
+```shell
+poetry run python3 pathfinding/models/dhc/evaluate.py test_model TESTS_DESCR MODEL_ID
+```
+where
+* TESTS_DESCR is a string of the format `'[(map_length, num_agents, density), ...]'` (you may want to copy this line from the generation command)
+* MODEL_ID is the name of the file from the `models` dir
+For example, by running
+
+```shell
+poetry run python3 pathfinding/models/dhc/evaluate.py test_model '[(40, 16, 0.3), (80, 4, 0.1)]' 60000
+```
+you will benchmark the `models/60000.pth` on the provided test cases 
+
+**Attention: the test cases must be generated first!** 
+
+#### Training
+1. Set the desired `actors` amount by setting the appropriate value for `dhc.train.num_actors` in `config.yaml`
+
+It is recommended to use the amount of CPU cores on you machine minus 2
+
+2. To initialize training, run
+```shell
+poetry run python3 pathfinding/models/dhc/train.py
+```
+
+The `models` dir will be created afterwards where the weights of the intermediate models will be saved.
+
+#### Visualizing
+
+1. To visualize the results, run
+```shell
+poetry run python3 pathfinding/models/dhc/visualize.py MODEL_ID TEST_NAME TEST_ID
+```
+where
+* MODEL_ID is the name of the file from the `models` dir
+* TEST_NAME is the name of the file with tests, for example `80length_32agents_0.3density.pkl`
+* TEST_ID [optional], id of the test from the provided test suite
+
+</details>
+
+## The setup
+The DHC network was trained on a single [NVIDIA TESLA T4 GPU](https://www.nvidia.com/en-us/data-center/tesla-t4/) for 7 hours.
+
+We used 20 CPU cores, 18 were used for the actors, additionally, 2 cores were used for the Learner and GlobalBuffer all together.
+
+
+## DHC Results
+
+**Our trained model outperforms PRIMAL benchmarks**
+
+![visAfter](./static/chart_40x40.png)
+![visAfter](./static/chart_80x80.png)
+
+![visAfter](./static/DHC_10x10_4_good.gif)
+![visAfter](./static/DHC_40x40_4_good.gif)
+![visAfter](./static/DHC_40x40_16_good.gif)
+![visAfter](./static/DHC_40x40_16_dense.gif)
+
+
+## Contributing
+<details>
+    <summary>See the detailed contribution guide</summary>
+
+1. Install [black](https://github.com/psf/black), you can likely run
+```shell
+pip3 install black 
+```
+
+3. Use [black](https://github.com/psf/black) to ensure that the codestyle remains great
+```shell
+poetry run black dir
+```
+2. Make sure tests are OK 
+```shell
+poetry run pytest
+```
+3. Create a PR with new features
+</details>
+
+## References
+
+<a id="1">[1]</a> 
+Ma, Ziyuan and Luo, Yudong and Ma, Hang, 2021. Distributed Heuristic Multi-Agent Path Finding with Communication.
+
+<a id="2">[2]</a> 
+Sartoretti, G., Kerr, J., Shi, Y., Wagner, G., Kumar, T.S., Koenig, S. and Choset, H., 2019. Primal: Pathfinding via reinforcement and imitation multi-agent learning. IEEE Robotics and Automation Letters, 4(3), pp.2378-2385.
+
+## License
+
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/acforvs/po-mapf-thesis/blob/main/LICENSE)
+
+
@@ -0,0 +1,55 @@
+dhc:
+  cnn_channels: 128
+  fov: !!python/tuple [9, 9]
+  observation_radius: 4  # since the FOV is 9x9
+  observation_shape: !!python/tuple [6, 9, 9]
+  hidden_dim: 256
+  max_comm_agents: 3 # includes the agent itself
+  batch_size: 192
+  max_num_agents: 2
+  latent_dim: 784  # 16 * 7 * 7, do not forget to change if the observation_shape is changed
+  max_episode_length: 256
+
+  communication:
+    num_comm_layers: 2
+    num_comm_heads: 2
+
+  buffer:
+    action_dim: 5
+    forward_steps: 2
+
+  worker:
+    episode_capacity: 2048
+    init_env_settings: !!python/tuple [ 1, 10 ]
+    max_comm_agents: 3
+    prioritized_replay_alpha: 0.6
+    prioritized_replay_beta: 0.4
+    forward_steps: 2
+    seq_len: 16
+    max_map_length: 40
+    pass_rate: 0.9
+    learning_starts: 50000
+    training_times: 600000
+    target_network_update_freq: 2000
+    save_interval: 2000
+    actor_update_steps: 400
+
+  train:
+    num_actors: 6
+    log_interval: 10
+
+
+environment:
+  map_length: 50
+  num_agents: 2
+  observation_radius: 4
+  reward_fn: 
+    move: -0.075
+    stay_on_goal: 0
+    stay_off_goal: -0.075
+    collision: -0.5
+    finish: 3
+
+  init_env_settings: !!python/tuple [1, 10]
+  observation_shape: !!python/tuple [6, 9, 9]
+  action_dim: 5