diff --git a/.fdignore b/.fdignore
new file mode 100644
index 0000000..d83afc4
--- /dev/null
+++ b/.fdignore
@@ -0,0 +1,4 @@
+.*
+cursors
+LICENSE
+docs
\ No newline at end of file
diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 0000000..57f9f81
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1,6 @@
+!.gitignore
+!*
+!*/*
+cache_db.json
+cache_tree.json
+vector_cache
diff --git a/docs/cache_title.json b/docs/cache_title.json
new file mode 100644
index 0000000..5781a61
--- /dev/null
+++ b/docs/cache_title.json
@@ -0,0 +1 @@
+{"_default": {"1": {"path": "/README.md", "hash": "c1452962d435f3130bd2b7095763ed0b", "title": "Minecraft AI Competition and House Building"}, "2": {"path": "/README.md:3-35", "hash": "528e7f92e05e70d15692eae4c10de9f0", "title": "Video PreTraining Model Setup and Usage"}, "3": {"path": "/README.md:37-51", "hash": "fa6a4c803afe5dbb40e55ae277b6ae55", "title": "Pre-Trained Minecraft RL Models"}, "4": {"path": "/README.md:52-65", "hash": "f2e7a65b626c9636dcb63c37db1c8270", "title": "Pre-Trained Minecraft Models and Fine-Tuning"}, "5": {"path": "/README.md:66-85", "hash": "8d9c337360c872d328fc7a096da1f31e", "title": "Pre-Trained Minecraft Models for Faster Tech Tree Progress"}, "6": {"path": "/README.md:86-99", "hash": "9ea990b45669dc0134ee7fb3d55d4401", "title": "IDM Demonstration with Contractor Recordings"}, "7": {"path": "/README.md:101-114", "hash": "0b94971dc211000fe15cd49a94e25fdb", "title": "Video Pre-Training Demonstration"}, "8": {"path": "/README.md:116-136", "hash": "8cbc4f80712b287fe5417cb9a08981f0", "title": "Major Limitations and Versions Used"}, "9": {"path": "/README.md:136-152", "hash": "342a35871686f5f4cd03adceb06ca8df", "title": "Minecraft AI Model Training Versions"}, "10": {"path": "/README.md:153-168", "hash": "b568e506d7f68d3ba62ae9d393caf048", "title": "Minecraft AI: Enhancing Performance through Early Data"}, "11": {"path": "/README.md:168-182", "hash": "b829009d51fba00bc60044912275db27", "title": "Simple Wooden House Building Guide"}, "12": {"path": "/README.md:182-194", "hash": "0bde4a099a070a7624ce959fe719f1c6", "title": "Timed Minecraft Demo: House Building Challenge"}, "13": {"path": "/README.md:196-215", "hash": "75ccb213456b9925987c3c970b7d48e5", "title": "Time-Based Task Completion"}, "14": {"path": "/README.md:216-237", "hash": "d1fd694759d9d175c7cfed75472bf64c", "title": "Random Starting Inventory Generator"}, "15": {"path": "/README.md:238-250", "hash": "6fef77d0b3373d7bca09c983d2875502", "title": "Random Biome-based Hotbar Items"}, "16": {"path": "/README.md:251-263", "hash": "153ed32f17b6e984224a14c5fa5c72aa", "title": "Randomized Inventory Population"}, "17": {"path": "/README.md:264-288", "hash": "6241c345c15d01c0a963f790a1369530", "title": "Sunflower Inventory Boost"}, "18": {"path": "/README.md:288-304", "hash": "49d1830f219d3fd17faa0bc6b8a6767b", "title": "Minecraft Diamonds and Version Updates"}, "19": {"path": "/README.md:304-311", "hash": "97f68c5645214247229a383b401c7571", "title": "Minecraft Testing Guidelines"}, "20": {"path": "/README.md:312-338", "hash": "469a714703b02558a1b4810b3df6866b", "title": "Demonstration Format and Index"}, "21": {"path": "/README.md:340-388", "hash": "8ea986d3ac3fe442825903b664ecb966", "title": "Action and File Structure for Data Loader"}, "22": {"path": "/README.md:389-418", "hash": "8244fac6f7c73eaec7f6f96ad458e1c0", "title": "JSON Game State Data for Minecraft"}, "23": {"path": "/README.md:420-439", "hash": "af8d3ba5fdd955953c02f8d041d6bf7e", "title": "Cave Hunt and Waterfall Snapshot"}, "24": {"path": "/README.md:440-451", "hash": "764b061bd3e3447803ef51f668ef0789", "title": "Village-Friendly Animal Pen Building Prompt"}, "25": {"path": "/README.md:451-467", "hash": "12f8e64faa253a7719a9b19611a39c4b", "title": "AI Minecraft Tasks: Construct, Protect, and Tour in 5 Minutes"}, "26": {"path": "/README.md:467-487", "hash": "56f06a976273abb9a1d445a97289bbb5", "title": "Eco-Friendly Minecraft Village Building"}, "27": {"path": "/README.md:488-497", "hash": "df2c6bd3444897bddc114fe563e7afa3", "title": "Acknowledging Authors & Crediting Anssi Kanervisto"}, "28": {"path": "/agent.py", "hash": "f0b7144b27c969548d0f7bfdd75edd77", "title": "Reinforcement Learning Minecraft Agent Setup"}, "29": {"path": "/agent.py:1-40", "hash": "477e62661cccf5f37ed9f6413a71066d", "title": "Minecraft Agent Configuration"}, "30": {"path": "/agent.py:41-77", "hash": "a7cec167c0e7ef0a93d113d96ff9d9ac", "title": "Game Agent Parameter Settings"}, "31": {"path": "/agent.py:78-101", "hash": "52c7b1e390a783caec5eba8e80337fc0", "title": "MineRL Environment Validation and Setup"}, "32": {"path": "/agent.py:102-128", "hash": "58f163063388dbd8a85b0f78601b5c65", "title": "MineRL Agent Image Resizing"}, "33": {"path": "/agent.py:129-154", "hash": "4bed89ca686c1285bdebfc22044f88f4", "title": "Agent Class and Methods"}, "34": {"path": "/agent.py:155-175", "hash": "8a7a942e8e7a92b25cc2509b6eb08b1c", "title": "Action Transformation and Conversion in MineRL"}, "35": {"path": "/agent.py:176-199", "hash": "49d600737c3659db4e07dbaf4d3bb1a3", "title": "Observation-Action Class"}, "36": {"path": "/agent.py:200-206", "hash": "5fd5d8dbc42eede2cbad3f9c62e4678b", "title": "Agent's Policy Action Selection"}, "37": {"path": "/behavioural_cloning.py", "hash": "6dce892f3e3e939450af46ee9643ae30", "title": "Agent-Based Actor-Critic Training in Behavioral Cloning"}, "38": {"path": "/behavioural_cloning.py:1-34", "hash": "90844b50397edb735a07c4e2f5cea0b5", "title": "Behavioral Cloning: Fine-Tuning with Gradient Accumulation"}, "39": {"path": "/behavioural_cloning.py:36-60", "hash": "14e849fc25813f0ded1d4c322cc4f29f", "title": "Load and Configure Agent Object"}, "40": {"path": "/behavioural_cloning.py:62-91", "hash": "bde143bc62406e2cf1b038560a0dc294", "title": "Training Environment Setup"}, "41": {"path": "/behavioural_cloning.py:92-114", "hash": "5b9ce3197db446f4afbdc7db2f182ef4", "title": "Policy-Based Actor-Critic Environment Setup"}, "42": {"path": "/behavioural_cloning.py:115-139", "hash": "a590fe7a9cab9bf10dfe36017b6312bf", "title": "Training Policy Model with Behavioral Cloning"}, "43": {"path": "/behavioural_cloning.py:140-143", "hash": "c22affe7ae7ceef7ca1dd78d90f326f3", "title": "Command Line Parsing for Behavioral Cloning Training"}, "44": {"path": "/data_loader.py", "hash": "16b1c5a1f5c3232251e5cf4d95420b2b", "title": "Batch Data Loader with Processes"}, "45": {"path": "/data_loader.py:1-40", "hash": "7f1dce54ea9e427499d87a5f865db5b2", "title": "MineRL VPT Data Loader"}, "46": {"path": "/data_loader.py:41-67", "hash": "da416bc576629796d6e82fb3cb95715c", "title": "Video Game Annotation Extractor"}, "47": {"path": "/data_loader.py:68-91", "hash": "325ff7cddbc2a3a0f83cdc64b8b0e133", "title": "Scrollwheel-based Attack Unsticking"}, "48": {"path": "/data_loader.py:92-113", "hash": "3daba2326b04f5513c51597920e4b8c1", "title": "Stuck State Detection and Action Removal"}, "49": {"path": "/data_loader.py:114-134", "hash": "633744c5b24c2dbbb96e7fba47c8e7d9", "title": "Compositing and Resizing Cursors"}, "50": {"path": "/data_loader.py:135-155", "hash": "f03e4212831d24e4731cd0dcdef4d47e", "title": "Sub-Inefficient Sequence Loader"}, "51": {"path": "/data_loader.py:156-178", "hash": "1ab6b2ce9b69fd19613a1e1e869a1d2a", "title": "Video Data Loader for Shuffled Demonstrations"}, "52": {"path": "/data_loader.py:179-209", "hash": "d328b49f66706191df58e36e49e54b99", "title": "Batch Data Loading Workers"}, "53": {"path": "/data_loader.py:210-222", "hash": "f4fd374556da20232bb066120e407056", "title": "Batch Data Processor"}, "54": {"path": "/inverse_dynamics_model.py", "hash": "fa8f52cbd181af8f2d69d3e84997c1e6", "title": "IDMAgent: Minecraft Predictor"}, "55": {"path": "/inverse_dynamics_model.py:1-33", "hash": "1e85d3d45b4729fe65d37e9e5dba2577", "title": "IDMAgent: Minecraft Action Predictor"}, "56": {"path": "/inverse_dynamics_model.py:34-56", "hash": "c2e942bf92647f7a8e11d676f88931ad", "title": "Agent Initialization and Weight Loading"}, "57": {"path": "/inverse_dynamics_model.py:57-79", "hash": "caab0687eb32e7b1574a35f492fc9f8a", "title": "Policy to MineRL Action Conversion"}, "58": {"path": "/inverse_dynamics_model.py:80-95", "hash": "d4170e090e7fbeac6c63668e4605a745", "title": "Agent State Tracking and Action Prediction"}, "59": {"path": "/lib/action_head.py", "hash": "7af31eb75d821d02a2dafee7a9dc34c8", "title": "Abstract Action Head Class for RL Action Spaces"}, "60": {"path": "/lib/action_head.py:1-36", "hash": "542c11d9892b80d7fec97dd0dac4b760", "title": "Fan-In Initialized Action Heads"}, "61": {"path": "/lib/action_head.py:37-63", "hash": "17710b50770bd67a267f45f0a4473e44", "title": "Abstract Base Class for Action Heads"}, "62": {"path": "/lib/action_head.py:64-89", "hash": "356cc33bf35e81c3744d1f197ab30248", "title": "Orthogonal Initialization for Action Head"}, "63": {"path": "/lib/action_head.py:91-119", "hash": "74fc1c049aca7431a16b90583c84a5f9", "title": "Calculate Entropy, Z-Score, and KL Divergence"}, "64": {"path": "/lib/action_head.py:120-151", "hash": "a28fd3583dceda6f6affadcc494ff3fa", "title": "Categorical Action Head Initialization"}, "65": {"path": "/lib/action_head.py:152-174", "hash": "58db5d5aadf23bbe4e252a50c4134e6c", "title": "Action Head Class Definition and Forward Function"}, "66": {"path": "/lib/action_head.py:176-196", "hash": "6ee0db40993e23222844e36f563ca2aa", "title": "Functions for Action Distributions"}, "67": {"path": "/lib/action_head.py:197-217", "hash": "4afb70523bdbe2c6141f5e6340df6f1a", "title": "Gumbel-Softmax for Categorical Training"}, "68": {"path": "/lib/action_head.py:218-243", "hash": "ce927827517b2e39a637ac93f17b91f8", "title": "DictActionHead Class: Forward Pass and Parameter Reset"}, "69": {"path": "/lib/action_head.py:244-264", "hash": "049d01fbd1f8c350ec78e58ae0e68375", "title": "Action Head Class: Supporting Methods"}, "70": {"path": "/lib/action_head.py:265-275", "hash": "851a45c77a249cbffd9f44fa78b48f72", "title": "ActionHead Selection"}, "71": {"path": "/lib/action_mapping.py", "hash": "0bd8e1a787709909afc20607a57ea5cb", "title": "Action Mapping and Management"}, "72": {"path": "/lib/action_mapping.py:1-32", "hash": "0dda6c9a3b35e536039a506b4d7ae83c", "title": "Action Mapping Class for Minecraft Customization"}, "73": {"path": "/lib/action_mapping.py:33-64", "hash": "78a5e898bb97a921e614eb5fd78bf641", "title": "Abstract Base Class for Action Mapping"}, "74": {"path": "/lib/action_mapping.py:65-82", "hash": "898672ed758bd190c4777691bad5d9ba", "title": "Button Selection from Factored Action Space"}, "75": {"path": "/lib/action_mapping.py:83-97", "hash": "24a693ee2c0e85994af6e076799cb4f3", "title": "Priority-based Action Mapping for 'None' Button"}, "76": {"path": "/lib/action_mapping.py:98-122", "hash": "c6e9e7b0ba06bde30c8b0019720aa8ee", "title": "Action Mapping Classes for Video Games"}, "77": {"path": "/lib/action_mapping.py:124-142", "hash": "5b37aa376d8647e70a1672d50f842444", "title": "Camera Meta Actions Mapping"}, "78": {"path": "/lib/action_mapping.py:143-161", "hash": "9289ea43db0f965cdb1df90ca7dd1c14", "title": "Button and Camera Array Mapping Setup"}, "79": {"path": "/lib/action_mapping.py:162-180", "hash": "4f0950820c77a86f0c14e03bda848ce2", "title": "Factored Action-Camera Combinations"}, "80": {"path": "/lib/action_mapping.py:181-200", "hash": "3a8834404f06efff949309449ca1f492", "title": "Assertion Check and Action Mapping"}, "81": {"path": "/lib/action_mapping.py:201-223", "hash": "d672a24a0358d1196450d274966239cc", "title": "Factoring New Action Space"}, "82": {"path": "/lib/action_mapping.py:225-234", "hash": "9f4f1a765fff297a03ba7e59668384bf", "title": "Class with Three Methods Defined"}, "83": {"path": "/lib/actions.py", "hash": "535b24b72ef01b4e5b2dfd4514fc4eee", "title": "Action-based Quantization in Minecraft"}, "84": {"path": "/lib/actions.py:1-54", "hash": "729d8cc7c52f3467cb22391f432b1ce2", "title": "Minecraft Action and Camera Quantization"}, "85": {"path": "/lib/actions.py:55-68", "hash": "6d3dbd98a43de2e5903f998b9a76e4b8", "title": "Mu-law and Linear Quantization Schemes"}, "86": {"path": "/lib/actions.py:69-95", "hash": "a1ec97ac35096c762c7aeea18c190f9b", "title": "Discretize and Undiscretize Values with Class"}, "87": {"path": "/lib/actions.py:97-130", "hash": "145c3411ff155288d12b9c273c150adf", "title": "Action Transformer Class for MinerL Environment"}, "88": {"path": "/lib/actions.py:132-160", "hash": "b3c8003230409e998477dddcce3ea727", "title": "Functions: Item to Name Conversion, Dict to Numpy, and Numpy to Dict"}, "89": {"path": "/lib/actions.py:161-178", "hash": "9ea828fc1b7fcf27071058d704b66c76", "title": "Camera Array Transformations"}, "90": {"path": "/lib/impala_cnn.py", "hash": "9fb4fc00cd02052bf50aca9151de3c11", "title": "Customizable ImpalaCNN Architecture"}, "91": {"path": "/lib/impala_cnn.py:1-42", "hash": "85906f510197568564b8801143d0fe21", "title": "ImpalaCNN: Residual Basic Blocks"}, "92": {"path": "/lib/impala_cnn.py:43-78", "hash": "10eaff967714e92af2ae849845c1efd1", "title": "ImpalaCNN: 1D Conv Layers for Image Classification"}, "93": {"path": "/lib/impala_cnn.py:79-107", "hash": "1a61cbcf50361fe83f7febe1eef0d08c", "title": "ImpalaCNN: Customizable Convolutional Neural Network"}, "94": {"path": "/lib/impala_cnn.py:108-139", "hash": "23767fd0d2e918d78bf5b74df34461d0", "title": "ImpalaCNN Model Architecture"}, "95": {"path": "/lib/impala_cnn.py:140-171", "hash": "36b4b29826af1fd4e71c0c1519eddf4f", "title": "ImpalaCNN: Custom Convolutional Class"}, "96": {"path": "/lib/impala_cnn.py:172-195", "hash": "d387a23275fcea5bb73e2be867f06bfb", "title": "Stacked 2D Convolutional CNN Model"}, "97": {"path": "/lib/masked_attention.py", "hash": "194e0c1de170a38578b8e9b0b21acbe1", "title": "Masked Attention Mechanism for Time Series"}, "98": {"path": "/lib/masked_attention.py:1-31", "hash": "7872d01a7b7f1558226b47775353ca96", "title": "Band Diagonal Time Series Mask"}, "99": {"path": "/lib/masked_attention.py:32-54", "hash": "7ec082e7b7098438a5c7bbdfe89fc08e", "title": "Generate Boolean Mask for Masked Attention"}, "100": {"path": "/lib/masked_attention.py:55-73", "hash": "38b2f2f8a85dd33373d2fbad78606c2f", "title": "Update State Mask for Masked Attention"}, "101": {"path": "/lib/masked_attention.py:75-102", "hash": "f468c154765a3bb4d3c4e57642f6cb03", "title": "Dynamic Episode Masking in Transformer Layers"}, "102": {"path": "/lib/masked_attention.py:104-113", "hash": "7ff482e4fa5d0808cb0fc359d42daee3", "title": "Masked Attention Mechanism Parameters"}, "103": {"path": "/lib/masked_attention.py:114-147", "hash": "251d3bc90a7be5b9cb66746b3786ee19", "title": "Masked Attention Initialization"}, "104": {"path": "/lib/masked_attention.py:148-173", "hash": "aa80bb911e0a0fd3deff1d212aeab297", "title": "Masked Attention Class and Forward Pass"}, "105": {"path": "/lib/masked_attention.py:174-182", "hash": "3278a4ac024e8c0aa8ff500c67798258", "title": "Masked Attention Method"}, "106": {"path": "/lib/minecraft_util.py", "hash": "a1cc0dcf2f304e14dd8d4899a8e07ab3", "title": "Average Entropy Calculator"}, "107": {"path": "/lib/minecraft_util.py:1-32", "hash": "78d12a272e2d4dfe53db3720a5565f8e", "title": "Decorator for Storing Method Arguments"}, "108": {"path": "/lib/minecraft_util.py:34-55", "hash": "a4ace935d80b7eb408ebc4fa28e66967", "title": "Normalized Entropy Calculation"}, "109": {"path": "/lib/minecraft_util.py:56-79", "hash": "eb1843791cfa372d9c19ed4afb4ee805", "title": "Calculate Entropy of Gaussian Action Heads"}, "110": {"path": "/lib/minecraft_util.py:80-88", "hash": "34083658bb90469d3cf8780418d0b4ea", "title": "Average Entropy from DiagGaussianActionHead/DictActionHead"}, "111": {"path": "/lib/misc.py", "hash": "d6628f403ced15ba98ec6a825caf1758", "title": "Data Processing Tasks: Calculating Products and Reshaping Input Data"}, "112": {"path": "/lib/misc.py:1-43", "hash": "efbc8472be90b62b37cbf55d2417812a", "title": "Data Manipulation Functions"}, "113": {"path": "/lib/misc.py:44-89", "hash": "2ae18ebfe0ee19c05022f94f15ad1201", "title": "Transpose and Compose Functions"}, "114": {"path": "/lib/misc.py:90-118", "hash": "7906a283abc0eb561f11792a09c16f9d", "title": "Handling Int and Str Calculations in Function"}, "115": {"path": "/lib/misc.py:119-157", "hash": "9a14be737c4b10ca56800ad1b4719e17", "title": "Sort Symbols in List"}, "116": {"path": "/lib/misc.py:158-187", "hash": "dfe762a7cbaf4e5bff79350e81429a3a", "title": "Check and Update Product of List Values"}, "117": {"path": "/lib/misc.py:188-223", "hash": "b596b6231a675a4373be7cd94a287e7f", "title": "Shape Reshaping and Exception Handling"}, "118": {"path": "/lib/misc.py:224-240", "hash": "15112c3e1e9f1467d95277f0b2e2dcf7", "title": "Input Validation and Shape Inference"}, "119": {"path": "/lib/misc.py:241-263", "hash": "d14ebcd9f5a064c8f4a3c30a5a342838", "title": "Utility Functions in lib/misc.py:241-263"}, "120": {"path": "/lib/mlp.py", "hash": "4711ffc2d6f5a64a9069a961837cf443", "title": "MLP Class: Neural Network Architecture"}, "121": {"path": "/lib/normalize_ewma.py", "hash": "cf0d00802e6ef9cf8b0c49aeddfae7d3", "title": "Efficient Normalization with Debiased Means"}, "122": {"path": "/lib/normalize_ewma.py:1-28", "hash": "cc6672c0f2c5338681ce4b50c58b880d", "title": "Exponential Weighted Normalization Module"}, "123": {"path": "/lib/normalize_ewma.py:29-51", "hash": "0fdc785d54725e0cf8d19dc2036d9966", "title": "Debiased Mean and Variance Calculator"}, "124": {"path": "/lib/normalize_ewma.py:52-60", "hash": "3c8a1263aa1ea729587df2cc306417ab", "title": "Efficient Data Normalization and EWMA Calculation"}, "125": {"path": "/lib/policy.py", "hash": "ae26b75539d0261a20612229c76358fd", "title": "Minecraft Reinforcement Learning with PyTorch"}, "126": {"path": "/lib/policy.py:1-32", "hash": "5ea95d0c6c724d8a7854f0215e5503a4", "title": "Normalizing Images with ImgPreprocessing Class"}, "127": {"path": "/lib/policy.py:33-62", "hash": "2ed9700242e277d7eef94867d1ca9395", "title": "ImgObsPreprocessing Class"}, "128": {"path": "/lib/policy.py:63-91", "hash": "decddcb506467e3fe53388d82c12acf4", "title": "Policy Class with CNN and Layer Initialization"}, "129": {"path": "/lib/policy.py:92-122", "hash": "d3fe0b8931a2d416231541fb298c9d5d", "title": "Deep Reinforcement Learning Policy Initializer"}, "130": {"path": "/lib/policy.py:123-150", "hash": "c6351d3b4db9410cf7514726d83d6d91", "title": "Class Initialization and Attributes"}, "131": {"path": "/lib/policy.py:151-178", "hash": "7bb192d9bfe26da770fad0c949e6a331", "title": "Layer Normalization Setup"}, "132": {"path": "/lib/policy.py:179-208", "hash": "8e9695919947d265ab282109ca3616c8", "title": "Initializing Recurrent Module and Forward Pass"}, "133": {"path": "/lib/policy.py:209-238", "hash": "aa6901be3668589edb545a6b758c6157", "title": "MinecraftAgentPolicy Class Definition"}, "134": {"path": "/lib/policy.py:240-269", "hash": "b6f53641fc91c664b792927833c415d3", "title": "Neural Policy Decisions Class"}, "135": {"path": "/lib/policy.py:271-299", "hash": "bd36b0a7a06e00a6dd6ecf91fda7f8dd", "title": "Policy Network Functions"}, "136": {"path": "/lib/policy.py:300-323", "hash": "a9d9c3de561db0c7d496991888fd701c", "title": "Time Dimension Policy Evaluation"}, "137": {"path": "/lib/policy.py:324-357", "hash": "1ec979580cb31ba09f8043f8ec23b4e1", "title": "InverseActionNet: MinecraftPolicy Subclass"}, "138": {"path": "/lib/policy.py:358-386", "hash": "a36d72e518b6157a9e59de68e1bb275c", "title": "3D Convolution Layer Initialization"}, "139": {"path": "/lib/policy.py:388-424", "hash": "bbf4bbb591510dc81a39b23eb881aed1", "title": "Inverse Action Policy Class"}, "140": {"path": "/lib/policy.py:425-457", "hash": "4d5078db16ab6503c38e15b7983c9f62", "title": "Training RL Policy with PyTorch"}, "141": {"path": "/lib/policy.py:458-467", "hash": "4f00936d505d9c023367a9930f62f55b", "title": "Log Probability Action Check"}, "142": {"path": "/lib/scaled_mse_head.py", "hash": "8042cb9009f9822b2f14a09d9a9944e5", "title": "Scaled MSE Loss for Linear Layer"}, "143": {"path": "/lib/scaled_mse_head.py:1-35", "hash": "6b1bb37e8a657d12b5fa1a6ea38c575f", "title": "Scaled MSE Head Layer: Normalizing Linear Outputs"}, "144": {"path": "/lib/scaled_mse_head.py:37-50", "hash": "ffa32ac0ccd144b77da6785efe3b2622", "title": "Scaled MSE Loss with Normalization/Denormalization"}, "145": {"path": "/lib/torch_util.py", "hash": "113429be8af1e3dfc5fc522a3be8dce4", "title": "Efficient PyTorch Library Management"}, "146": {"path": "/lib/torch_util.py:1-57", "hash": "8f6059acf01bbde05b8c6f607d0cad37", "title": "Torch CUDA Utilities"}, "147": {"path": "/lib/torch_util.py:60-96", "hash": "234e5a0864eb7995ab93f3590355d262", "title": "Linear and LayerNorm Functions in Torch Utils"}, "148": {"path": "/lib/torch_util.py:97-131", "hash": "002203a8a7a4ff79c46b086717d52338", "title": "LayerNorm, Flatten, Sequential, Load Avg with Metadata"}, "149": {"path": "/lib/torch_util.py:132-165", "hash": "65ce4d6d9b2ad8b8a268b12dbdaac006", "title": "PyTorch Decorator for Model State Saving"}, "150": {"path": "/lib/torch_util.py:166-199", "hash": "f1b8d33010adc996e231787d84132191", "title": "PyTorch Tensor Data Type Parser"}, "151": {"path": "/lib/torch_util.py:200-200", "hash": "a5a547bcc4c9d2e1a9b94ab4788f3944", "title": "Squeeze Tensor Dimension"}, "152": {"path": "/lib/tree_util.py", "hash": "625abac6f11781891afdfc4d87c9f332", "title": "Tree Utilities for PyTree API"}, "153": {"path": "/lib/tree_util.py:1-35", "hash": "e4e3e2bfa15b3126a1a3c820d7b1a503", "title": "Utility Functions for Dictionary, Tuple, and List Manipulation"}, "154": {"path": "/lib/tree_util.py:36-70", "hash": "25329052d6e7e7649d7a56a93a617034", "title": "Safe List Operations and Tree Mapping"}, "155": {"path": "/lib/tree_util.py:71-93", "hash": "53c63a09ecae6e07d88e4dfa272a45a0", "title": "Multi-Input Function Applied to Pytree Leaves"}, "156": {"path": "/lib/tree_util.py:95-119", "hash": "f31c286c204a255e6a2485fd59d0e112", "title": "Tree Node Type Determination and Iteration"}, "157": {"path": "/lib/tree_util.py:120-141", "hash": "d2e14dae3afe8da5df820635f5a1308e", "title": "Validate Tree Structure in Python"}, "158": {"path": "/lib/tree_util.py:142-171", "hash": "966f58321fbef5235481571ea311705f", "title": "Tree Utilities: Building and Unflattening Trees"}, "159": {"path": "/lib/tree_util.py:172-201", "hash": "0f8d79dbe27cdec28dd78a410c2f22b6", "title": "PyTreeDef: Tree Nodes Definition and Operations"}, "160": {"path": "/lib/tree_util.py:203-256", "hash": "9f9ac37d63d0dd5ba89030d6f7fdceb4", "title": "Tree-like Object Serialization Class"}, "161": {"path": "/lib/tree_util.py:259-280", "hash": "bc0e6661f9bbe8f75ce33ed69787b0b3", "title": "Serializing Complex Data Types with PyTree API"}, "162": {"path": "/lib/util.py", "hash": "4e21f48fcd62b3ebec30b393e487309a", "title": "Neural Network Utilities"}, "163": {"path": "/lib/util.py:1-30", "hash": "3330e6aec16f2a2ec9afbd7b57eada60", "title": "Recursive Module Key Collection and Fan-in ReLU Layer"}, "164": {"path": "/lib/util.py:31-62", "hash": "45931712719df1dc656017d17bbda4f6", "title": "Dynamic Normalization Function Initialization"}, "165": {"path": "/lib/util.py:64-94", "hash": "ed09680edf59de33683b6f66a88bb387", "title": "Residual Recurrent Blocks for Neural Network Stability"}, "166": {"path": "/lib/util.py:95-126", "hash": "48928a37d35211208abb6217d9c071f9", "title": "ResidualRecurrentBlock Class"}, "167": {"path": "/lib/util.py:127-161", "hash": "de221eb02380d8de0f34fb0da8fefd04", "title": "Residual Recurrent Block Initialization"}, "168": {"path": "/lib/util.py:162-184", "hash": "16bc40801c3351aa674c94190b5b9b55", "title": "Layer Normalization and MLP Initialization"}, "169": {"path": "/lib/util.py:185-216", "hash": "5fa40e35b97127225066bab432cb717b", "title": "Transformer Block Recurrent Pass"}, "170": {"path": "/lib/util.py:217-253", "hash": "b953b83e301b7ac3941b7207d72191f6", "title": "Reversing LSTM Input Sequences"}, "171": {"path": "/lib/util.py:254-276", "hash": "77dda5342e28ecbae5b3854f27ade036", "title": "Band-Repeat Normalization"}, "172": {"path": "/lib/xf.py", "hash": "8e7b332087a17e58b4d48435a0bfe5e6", "title": "Strided Attention Mechanism for Transformers"}, "173": {"path": "/lib/xf.py:1-43", "hash": "c073f7f87fcf768fe99ed9b211fadd17", "title": "Softmax Attention Function"}, "174": {"path": "/lib/xf.py:44-68", "hash": "81fbe2c22964622eb005cdbcef5e7452", "title": "Multi-Head Attention Logit Calculation"}, "175": {"path": "/lib/xf.py:69-107", "hash": "593b4b0c82f019183bcfc23bdb4bf056", "title": "Attention Mechanism Class and Split Heads Function"}, "176": {"path": "/lib/xf.py:108-138", "hash": "38a6f4a2a862d129f615fe786d2988e0", "title": "XF Class Initialization and Preprocessing"}, "177": {"path": "/lib/xf.py:141-163", "hash": "2495bf9a78ca11d88b21a81980974eb0", "title": "StridedAttn: Preprocessing and Initialization"}, "178": {"path": "/lib/xf.py:164-192", "hash": "acd03f60e561c82698b0282c297a846c", "title": "Data Preprocessing for Deep Learning Model"}, "179": {"path": "/lib/xf.py:193-215", "hash": "69d37151684e3884e68210c0f9cae5fd", "title": "Transformer Preprocessing Function"}, "180": {"path": "/lib/xf.py:216-254", "hash": "4f91631e1997f9012006afb21ce55943", "title": "Attention Layer Base Class"}, "181": {"path": "/lib/xf.py:255-291", "hash": "9019c1e912c0e5db48937207261eae8b", "title": "Self-Attention Layer Implementation"}, "182": {"path": "/lib/xf.py:292-331", "hash": "3dd45a469d150e070a4b9f8adaa5b487", "title": "Attention Module Constructor"}, "183": {"path": "/lib/xf.py:332-359", "hash": "6407f4a805abf53423486081a78e62a6", "title": "Attention Residual Class"}, "184": {"path": "/lib/xf.py:360-382", "hash": "f6eba634c5ed7d3d37a1b2182d882d13", "title": "Neural Network Forward, Stateless Forward & State Update Functions"}, "185": {"path": "/lib/xf.py:383-414", "hash": "2db535c4d8ff1b598ed3ae1537588d4c", "title": "Residual MLP Layer Initialization"}, "186": {"path": "/lib/xf.py:415-450", "hash": "0e5f5432a3d448c626831ed76aeb38d2", "title": "ResNet Multi-Scale Module"}, "187": {"path": "/lib/xf.py:451-479", "hash": "c400dd2387946fc7c6e8647df097e15c", "title": "Split-Apply-Combine with Multiscale Layers"}, "188": {"path": "/requirements.txt", "hash": "0fa2edc6f9b06a901f5e97f98d7d0c84", "title": "PyTorch, Gym, attrs, OpenCV Installation Guide"}, "189": {"path": "/run_agent.py", "hash": "2617f4cdf6936614d2e17bfa647bb460", "title": "Run Agent Model"}, "190": {"path": "/run_agent.py:1-30", "hash": "1feeff9c7bb3457fac4f53429d8a677e", "title": "Train MineRL Agent with Pre-loaded Model"}, "191": {"path": "/run_agent.py:31-35", "hash": "6ffa7a73d52744d99d6dbb8262ae7bf5", "title": "Argument Parser Model File Loading"}, "192": {"path": "/run_inverse_dynamics_model.py", "hash": "07bf0334f978079c5efb025263736a64", "title": "Inverse Dynamics Model Game Engine"}, "193": {"path": "/run_inverse_dynamics_model.py:1-36", "hash": "bb7b5dc5b3a87b673f66d4297ea4528f", "title": "Key-Action Mapping Dictionary for Game Controls"}, "194": {"path": "/run_inverse_dynamics_model.py:37-86", "hash": "8b0a326a7ad0fbd90960e5d50ff854e7", "title": "Inverse Dynamics Model Training"}, "195": {"path": "/run_inverse_dynamics_model.py:87-112", "hash": "5fbe7ba4ba6d35cbc8e9ece32c604956", "title": "Reset Camera Action & Input Handler"}, "196": {"path": "/run_inverse_dynamics_model.py:114-143", "hash": "0088ec5b7e15efb9549727a9bff97658", "title": "Loading Agent and Capturing Video Input"}, "197": {"path": "/run_inverse_dynamics_model.py:144-170", "hash": "c7200100cae75fe5aab2b429b5447585", "title": "Inverse Dynamics Model Execution"}, "198": {"path": "/run_inverse_dynamics_model.py:171-197", "hash": "27221056654463726616621a4dd8f922", "title": "Real-Time IDM Model Predictions on Videos"}, "199": {"path": "/run_inverse_dynamics_model.py:198-205", "hash": "3a9b72a25856ca86ba216c62fd123b08", "title": "Command Line Arguments Parser"}}}
\ No newline at end of file
diff --git a/docs/codeview.html b/docs/codeview.html
new file mode 100644
index 0000000..761065f
--- /dev/null
+++ b/docs/codeview.html
@@ -0,0 +1,669 @@
+
+
+
+
+
+
+
+
+ Code View
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/data/0.json b/docs/data/0.json
new file mode 100644
index 0000000..cd462a4
--- /dev/null
+++ b/docs/data/0.json
@@ -0,0 +1,548 @@
+{
+ "0": {
+ "file_id": 0,
+ "content": "/README.md",
+ "type": "filepath"
+ },
+ "1": {
+ "file_id": 0,
+ "content": "The comments discuss Minecraft AI model training, reinforcement learning features, and limited resource data collection for the MineRL BASALT 2022 competition, as well as building a house in under 10 minutes without harming villages.",
+ "type": "summary"
+ },
+ "2": {
+ "file_id": 0,
+ "content": "# Video-Pre-Training\nVideo PreTraining (VPT): Learning to Act by Watching Unlabeled Online Videos\n> :page_facing_up: [Read Paper](https://cdn.openai.com/vpt/Paper.pdf) \\\n :mega: [Blog Post](https://openai.com/blog/vpt) \\\n :space_invader: [MineRL Environment](https://github.com/minerllabs/minerl) (note version 1.0+ required) \\\n :checkered_flag: [MineRL BASALT Competition](https://www.aicrowd.com/challenges/neurips-2022-minerl-basalt-competition)\n# Running agent models\nInstall pre-requirements for [MineRL](https://minerl.readthedocs.io/en/latest/tutorials/index.html).\nThen install requirements with:\n```\npip install git+https://github.com/minerllabs/minerl\npip install -r requirements.txt\n```\nTo run the code, call\n```\npython run_agent.py --model [path to .model file] --weights [path to .weight file]\n```\nAfter loading up, you should see a window of the agent playing Minecraft.\n# Agent Model Zoo\nBelow are the model files and weights files for various pre-trained Minecraft models.\nThe 1x, 2x and 3x model files correspond to their respective model weights width.",
+ "type": "code",
+ "location": "/README.md:3-35"
+ },
+ "3": {
+ "file_id": 0,
+ "content": "This code provides instructions for setting up and running the Video PreTraining (VPT) model, which learns to act by watching unlabeled online videos. It also includes a link to the paper describing the methodology and provides information on where to find more resources related to VPT. The code includes commands to install pre-requisites, requirements, and run the agent models. Additionally, it lists various pre-trained Minecraft models with their respective model files and weights.",
+ "type": "comment"
+ },
+ "4": {
+ "file_id": 0,
+ "content": "* [:arrow_down: 1x Model](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-1x.model)\n* [:arrow_down: 2x Model](https://openaipublic.blob.core.windows.net/minecraft-rl/models/2x.model)\n* [:arrow_down: 3x Model](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-3x.model)\n### Demonstration Only - Behavioral Cloning\nThese models are trained on video demonstrations of humans playing Minecraft\nusing behavioral cloning (BC) and are more general than later models which\nuse reinforcement learning (RL) to further optimize the policy.\nFoundational models are trained across all videos in a single training run\nwhile house and early game models refine their respective size foundational\nmodel further using either the housebuilding contractor data or early game video\nsub-set. See the paper linked above for more details.\n#### Foundational Model :chart_with_upwards_trend:\n * [:arrow_down: 1x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-1x.weights)",
+ "type": "code",
+ "location": "/README.md:37-51"
+ },
+ "5": {
+ "file_id": 0,
+ "content": "This code provides links to download pre-trained models for Minecraft reinforcement learning, trained using behavioral cloning on video demonstrations. Foundational models are trained across all videos in a single training run, while house and early game models refine further with specific data sets.",
+ "type": "comment"
+ },
+ "6": {
+ "file_id": 0,
+ "content": " * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-2x.weights)\n * [:arrow_down: 3x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-3x.weights)\n#### Fine-Tuned from House :chart_with_upwards_trend:\n * [:arrow_down: 3x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/bc-house-3x.weights)\n#### Fine-Tuned from Early Game :chart_with_upwards_trend:\n * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/bc-early-game-2x.weights)\n * [:arrow_down: 3x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/bc-early-game-3x.weights)\n### Models With Environment Interactions\nThese models further refine the above demonstration based models with a reward\nfunction targeted at obtaining diamond pickaxes. While less general then the behavioral\ncloning models, these models have the benefit of interacting with the environment",
+ "type": "code",
+ "location": "/README.md:52-65"
+ },
+ "7": {
+ "file_id": 0,
+ "content": "This code provides links to pre-trained models for Minecraft gameplay and mentions fine-tuned models from specific starting points. Additionally, it introduces models with environment interactions that are trained using a reward function aimed at obtaining diamond pickaxes.",
+ "type": "comment"
+ },
+ "8": {
+ "file_id": 0,
+ "content": "using a reward function and excel at progressing through the tech tree quickly.\nSee the paper for more information\non how they were trained and the exact reward schedule.\n#### RL from Foundation :chart_with_upwards_trend:\n * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/rl-from-foundation-2x.weights)\n#### RL from House :chart_with_upwards_trend:\n * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/rl-from-house-2x.weights)\n#### RL from Early Game :chart_with_upwards_trend:\n * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/rl-from-early-game-2x.weights)\n# Running Inverse Dynamics Model (IDM)\nIDM aims to predict what actions player is taking in a video recording.\nSetup:\n* Install requirements: `pip install -r requirements.txt`\n* Download the IDM model [.model :arrow_down:](https://openaipublic.blob.core.windows.net/minecraft-rl/idm/4x_idm.model) and [.weight :arrow_down:](https://openaipublic.blob.core.windows.net/minecraft-rl/idm/4x_idm.weights) files",
+ "type": "code",
+ "location": "/README.md:66-85"
+ },
+ "9": {
+ "file_id": 0,
+ "content": "This code provides pre-trained models for Minecraft gameplay using reinforcement learning and an Inverse Dynamics Model (IDM). The models are trained with different reward functions and excel at progressing quickly through the tech tree. Users can download the models and weights from specified URLs to run the IDM, which predicts player actions in a video recording.",
+ "type": "comment"
+ },
+ "10": {
+ "file_id": 0,
+ "content": "* For demonstration purposes, you can use the contractor recordings shared below to. For this demo we use\n [this .mp4](https://openaipublic.blob.core.windows.net/minecraft-rl/data/10.0/cheeky-cornflower-setter-02e496ce4abb-20220421-092639.mp4)\n and [this associated actions file (.jsonl)](https://openaipublic.blob.core.windows.net/minecraft-rl/data/10.0/cheeky-cornflower-setter-02e496ce4abb-20220421-092639.jsonl).\nTo run the model with above files placed in the root directory of this code:\n```\npython run_inverse_dynamics_model.py -weights 4x_idm.weights --model 4x_idm.model --video-path cheeky-cornflower-setter-02e496ce4abb-20220421-092639.mp4 --jsonl-path cheeky-cornflower-setter-02e496ce4abb-20220421-092639.jsonl\n```\nA window should pop up which shows the video frame-by-frame, showing the predicted and true (recorded) actions side-by-side on the left.\nNote that `run_inverse_dynamics_model.py` is designed to be a demo of the IDM, not code to put it into practice.\n# Using behavioural cloning to fine-tune the models",
+ "type": "code",
+ "location": "/README.md:86-99"
+ },
+ "11": {
+ "file_id": 0,
+ "content": "This code provides a demonstration of the Inverse Dynamics Model (IDM) using provided contractor recordings for video and actions. The model is run with specific file paths, and a window displays the predicted and true actions side-by-side. Note that this script is designed for demonstration purposes only, not for practical implementation.",
+ "type": "comment"
+ },
+ "12": {
+ "file_id": 0,
+ "content": "**Disclaimer:** This code is a rough demonstration only and not an exact recreation of what original VPT paper did (but it contains some preprocessing steps you want to be aware of)! As such, do not expect replicate the original experiments with this code. This code has been designed to be run-able on consumer hardware (e.g., 8GB of VRAM).\nSetup:\n* Install requirements: `pip install -r requirements.txt`\n* Download `.weights` and `.model` file for model you want to fine-tune.\n* Download contractor data (below) and place the `.mp4` and `.jsonl` files to the same directory (e.g., `data`). With default settings, you need at least 12 recordings.\nIf you downloaded the \"1x Width\" models and placed some data under `data` directory, you can perform finetuning with\n```\npython behavioural_cloning.py --data-dir data --in-model foundation-model-1x.model --in-weights foundation-model-1x.weights --out-weights finetuned-1x.weights\n```\nYou can then use `finetuned-1x.weights` when running the agent. You can change the training settings at the top of `behavioural_cloning.py`.",
+ "type": "code",
+ "location": "/README.md:101-114"
+ },
+ "13": {
+ "file_id": 0,
+ "content": "This code is a demonstration of video pre-training using behavioral cloning. It requires installing dependencies, downloading model and data files, and then fine-tuning the model with custom weights for better performance.",
+ "type": "comment"
+ },
+ "14": {
+ "file_id": 0,
+ "content": "Major limitations:\n- Only trains single step at the time, i.e., errors are not propagated through timesteps.\n- Computes gradients one sample at a time to keep memory use low, but also slows down the code.\n# Contractor Demonstrations\n### Versions\nOver the course of the project we requested various demonstrations from contractors\nwhich we release as index files below. In general, major recorder versions change for a new\nprompt or recording feature while bug-fixes were represented as minor version changes.\nHowever, some\nrecorder versions we asked contractors to change their username when recording particular\nmodalities. Also, as contractors internally ask questions, clarification from one contractor may\nresult in a behavioral change in the other contractor. It is intractable to share every contractor's\nview for each version, but we've shared the prompts and major clarifications for each recorder\nversion where the task changed significantly.\n \n Initial Prompt \n We are collect",
+ "type": "code",
+ "location": "/README.md:116-136"
+ },
+ "15": {
+ "file_id": 0,
+ "content": "This code is a README.md file that lists the major limitations of the pre-training, describes a contractor demonstrations section, and provides details about the versions used for this project. It also mentions how different versions were used to change modalities or prompt changes due to contractor's internal questions and clarifications.",
+ "type": "comment"
+ },
+ "16": {
+ "file_id": 0,
+ "content": "ing data for training AI models in Minecraft. You'll need to install java, download the modified version of minecraft (that collects and uploads your play data), and play minecraft survival mode! Paid per hour of gameplay. Prior experience in minecraft not. necessary. We do not collect any data that is unrelated to minecraft from your computer.\n \nThe following is a list of the available versions:\n* **6.x** Core recorder features subject to change [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_6xx_Jun_29.json)\n * 6.9 First feature complete recorder version\n * 6.10 Fixes mouse scaling on Mac when gui is open\n * 6.11 Tracks the hotbar slot\n * 6.13 Sprinting, swap-hands, ... (see commits below)\n \n Commits \n * improve replays that are cut in the middle of gui; working on riding boats / replays cut in the middle of a run\n * improve replays by adding dwheel action etc, also, loosen up replay tolerances\n * opencv version bump",
+ "type": "code",
+ "location": "/README.md:136-152"
+ },
+ "17": {
+ "file_id": 0,
+ "content": "This code is a list of available versions for the Minecraft AI model training program. It includes a description of the features in each version and provides links to download the modified Minecraft version for training.",
+ "type": "comment"
+ },
+ "18": {
+ "file_id": 0,
+ "content": " * add swap hands, and recording of the step timestamp\n * implement replaying from running and sprinting and tests\n * do not record sprinting (can use stats for that)\n * check for mouse button number, ignore >2\n * handle the errors when mouse / keyboard are recorded as null\n \n* **7.x** Prompt changes [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_7xx_Apr_6.json)\n * 7.6 Bump version for internal tracking\n \n Additional ask to contractors \n Right now, early game data is especially valuable to us. As such, we request that at least half of the data you upload is from the first 30 minutes of the game. This means that, for every hour of gameplay you spend in an older world, we ask you to play two sessions in which you create a new world and play for 30 minutes. You can play for longer in these worlds, but only the first 30 minutes counts as early game data.\n \n* **8.x** :clipboard",
+ "type": "code",
+ "location": "/README.md:153-168"
+ },
+ "19": {
+ "file_id": 0,
+ "content": "This code seems to be part of a README file for a Minecraft-based reinforcement learning project. The comments suggest that the developers are working on various features such as swapping hands and implementing replaying from running and sprinting. They also mention not recording sprinting data, checking for mouse button numbers, and handling errors related to recording mouse or keyboard input as null. Additionally, there is a note about prompting contractors to collect early game data (first 30 minutes) in newer worlds to help improve the AI's performance in those stages of the game.",
+ "type": "comment"
+ },
+ "20": {
+ "file_id": 0,
+ "content": ": House Building from Scratch Task [:arrow_down: index](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_8xx_Jun_29.json)\n \n Changes and Prompt \n Hi all! Thank you for your hard work so far.\n This week we would like to have you all collect data on a specific task.\n This comes with a new recorder version 8.0 which you will need to update your recording script to download.\n This week we would like you to use a new world each time you play, so loading existing worlds is disabled.\n The new task is as follows:\n Starting in a new world, build a simple house in 10-15 minutes. This corresponds to one day and a bit of the night. Please use primarily wood, dirt, and sand, as well as crafted wood items such as doors, fences, ect. in constructing your house. Avoid using difficult items such as stone. Aside from those constraints, you may decorate the structure you build as you wish. It does not need to have any specific furniture. For example, it is ",
+ "type": "code",
+ "location": "/README.md:168-182"
+ },
+ "21": {
+ "file_id": 0,
+ "content": "This code is providing instructions for a new task, \"Building a Simple House\". The task involves building a house using primarily wood, dirt, and sand, along with crafted wood items. The structure can be decorated as desired but should not use difficult materials such as stone. It also provides information about the need to update the recording script and use a new world each time.",
+ "type": "comment"
+ },
+ "22": {
+ "file_id": 0,
+ "content": "OK if there is no bed in your house. If you have not finished the house by the sunrise (20 minutes) please exit and continue to another demonstration. Please continue to narrate what you are doing while completing this task.\n Since you will be unable to resume building after exiting Minecraft or going back to the main menu, you must finish these demonstrations in one session. Pausing via the menu is still supported. If you want to view your creations later, they will be saved locally so you can look at them in your own time. We may use these save files in a future task so if you have space, please leave the save files titled “build-house-15-min-“.\n For this week try to avoid all cobblestone / stone / granite\n For this week we just want simple houses without sleeping. If 10 minutes is too short, let us know and we can think of how to adjust!\n Stone tools are ok but I think you may run-out of time\n Changes:\n * Timer ends episode after 10 realtime minutes\n * Worlds are named: `\"build-house-15-min-\" + Math.abs(random.nextInt());`",
+ "type": "code",
+ "location": "/README.md:182-194"
+ },
+ "23": {
+ "file_id": 0,
+ "content": "This code is for a Minecraft demonstration task where the player must build a house within 20 minutes. If not finished, they should exit and continue with another demo. Narration is required throughout. The demonstrations need to be completed in one session without resuming after exiting or going back to the main menu. Save files will be saved locally for viewing later but may be used for future tasks if space permits. The task requires a simple house without sleep areas and avoids cobblestone, stone, and granite. Stone tools are acceptable but time-limited. The episode ends after 10 realtime minutes. World names follow the format \"build-house-15-min-\" + random number.",
+ "type": "comment"
+ },
+ "24": {
+ "file_id": 0,
+ "content": " \n * Note this version introduces 10-minute timer that ends the episode. It\n cut experiments short occasionally and was fixed in 9.1\n * 8.0 Simple House\n * 8.2 Update upload script\n* **9.x** :clipboard: House Building from Random Starting Materials Task [:arrow_down: index](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_9xx_Jun_29.json)\n \n Changes and Prompt \n You now will have 10 minutes to use the provided resources to build your house / home / or structure. In this version, the experiment will time out after 10 minutes if you are not complete so don't be alarmed if that happens, it is intentional.\n No need to use up all the resources! It's ok to collect a few things but spend the majority of the time placing blocks (the act of placing seems to be harder to learn)\n Changes:\n * Worlds are named: `\"design-house-10-min-\" + Math.abs(random.nextInt());`\n * Starting inventory given by code below\n \n ",
+ "type": "code",
+ "location": "/README.md:196-215"
+ },
+ "25": {
+ "file_id": 0,
+ "content": "Introduces 10-minute timer for task completion.",
+ "type": "comment"
+ },
+ "26": {
+ "file_id": 0,
+ "content": " Random Starting Inventory Code \n ```java\n Random random = new Random();\n List hotbar = new ArrayList<>();\n List inventory = new ArrayList<>();\n // Ensure we give the player the basic tools in their hot bar\n hotbar.add(new ItemStack(Items.STONE_AXE));\n hotbar.add(new ItemStack(Items.STONE_PICKAXE));\n hotbar.add(new ItemStack(Items.STONE_SHOVEL));\n hotbar.add(new ItemStack(Items.CRAFTING_TABLE));\n // Add some random items to the player hotbar as well\n addToList(hotbar, inventory, Items.TORCH, random.nextInt(16) * 2 + 2);\n // Next add main building blocks\n if (random.nextFloat() < 0.7) {\n addToList(hotbar, inventory, Items.OAK_FENCE_GATE, random.nextInt(5));\n addToList(hotbar, inventory, Items.OAK_FENCE, random.nextInt(5) * 64);\n addToList(hotbar, inventory, Items.OAK_DOOR, random.nextInt(5));\n addToList(hotbar, inventory, Items.OAK_TRAPDOOR, random.nextInt(2) * 2);",
+ "type": "code",
+ "location": "/README.md:216-237"
+ },
+ "27": {
+ "file_id": 0,
+ "content": "This code generates a random starting inventory for the player by adding basic tools, some random items, and building blocks to their hotbar and inventory.",
+ "type": "comment"
+ },
+ "28": {
+ "file_id": 0,
+ "content": " addToList(hotbar, inventory, Items.OAK_PLANKS, random.nextInt(3) * 64 + 128);\n addToList(hotbar, inventory, Items.OAK_SLAB, random.nextInt(3) * 64);\n addToList(hotbar, inventory, Items.OAK_STAIRS, random.nextInt(3) * 64);\n addToList(hotbar, inventory, Items.OAK_LOG, random.nextInt(2) * 32);\n addToList(hotbar, inventory, Items.OAK_PRESSURE_PLATE, random.nextInt(5));\n } else {\n addToList(hotbar, inventory, Items.BIRCH_FENCE_GATE, random.nextInt(5));\n addToList(hotbar, inventory, Items.BIRCH_FENCE, random.nextInt(5) * 64);\n addToList(hotbar, inventory, Items.BIRCH_DOOR, random.nextInt(5));\n addToList(hotbar, inventory, Items.BIRCH_TRAPDOOR, random.nextInt(2) * 2);\n addToList(hotbar, inventory, Items.BIRCH_PLANKS, random.nextInt(3) * 64 + 128);\n addToList(hotbar, inventory, Items.BIRCH_SLAB, random.nextInt(3) * 64);\n addToList(hotbar, inventory, Items.BIRCH_STAIRS, random.nextInt(3) * 64);",
+ "type": "code",
+ "location": "/README.md:238-250"
+ },
+ "29": {
+ "file_id": 0,
+ "content": "This code randomly selects items to add to the hotbar based on the type of biome the player is in. It uses different lists of items for oak and birch biomes.",
+ "type": "comment"
+ },
+ "30": {
+ "file_id": 0,
+ "content": " addToList(hotbar, inventory, Items.BIRCH_LOG, random.nextInt(2) * 32);\n addToList(hotbar, inventory, Items.BIRCH_PRESSURE_PLATE, random.nextInt(5));\n }\n // Now add some random decoration items to the player inventory\n addToList(hotbar, inventory, Items.CHEST, random.nextInt(3));\n addToList(hotbar, inventory, Items.FURNACE, random.nextInt(2) + 1);\n addToList(hotbar, inventory, Items.GLASS_PANE, random.nextInt(5) * 4);\n addToList(hotbar, inventory, Items.WHITE_BED, (int) (random.nextFloat() + 0.2)); // Bed 20% of the time\n addToList(hotbar, inventory, Items.PAINTING, (int) (random.nextFloat() + 0.1)); // Painting 10% of the time\n addToList(hotbar, inventory, Items.FLOWER_POT, (int) (random.nextFloat() + 0.1) * 4); // 4 Flower pots 10% of the time\n addToList(hotbar, inventory, Items.OXEYE_DAISY, (int) (random.nextFloat() + 0.1) * 4); // 4 Oxeye daisies 10% of the time\n addToList(hotbar, inventory, Items.POPPY, (int) (random.nextFloat() + 0.1) * 4); // 4 Poppies 10% of the time",
+ "type": "code",
+ "location": "/README.md:251-263"
+ },
+ "31": {
+ "file_id": 0,
+ "content": "This code is adding a variety of items to the player's inventory. It uses random number generation to decide how many of each item to add, with some items having a higher chance of appearing than others (e.g., beds have a 20% chance). This helps create a diverse and unpredictable inventory for the player to work with.",
+ "type": "comment"
+ },
+ "32": {
+ "file_id": 0,
+ "content": " addToList(hotbar, inventory, Items.SUNFLOWER, (int) (random.nextFloat() + 0.1) * 4); // 4 Sunflowers 10% of the time\n // Shuffle the hotbar slots and inventory slots\n Collections.shuffle(hotbar);\n Collections.shuffle(inventory);\n // Give the player the items\n this.mc.getIntegratedServer().getPlayerList().getPlayers().forEach(p -> {\n if (p.getUniqueID().equals(this.getUniqueID())) {\n hotbar.forEach(p.inventory::addItemStackToInventory);\n inventory.forEach(p.inventory::addItemStackToInventory);\n }\n });\n ```\n \n * 9.0 First version\n * 9.1 Fixed timer bug\n* **10.0** :clipboard: Obtain Diamond Pickaxe Task [:arrow_down: index](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_10xx_Jun_29.json)\n \n Changes and Prompt \n Prompt:\n For this new task we have given you 20 minutes to craft a diamond pickaxe. We ask that you do not try to search for vi",
+ "type": "code",
+ "location": "/README.md:264-288"
+ },
+ "33": {
+ "file_id": 0,
+ "content": "This code adds 4 sunflowers to the player's inventory 10% of the time, shuffles both the hotbar and inventory slots, and then gives the player the items if they are the same as the current player.",
+ "type": "comment"
+ },
+ "34": {
+ "file_id": 0,
+ "content": "llages or other ways of getting diamonds, but if you are spawned in view of one, or happen to fall into a cave structure feel free to explore it for diamonds.\n If 20 min is not enough that is OK. It will happen on some seeds because of bad luck. Please do not use glitches to find the diamonds.\n Changes:\n * change to 20 minute time limit\n * _don't count gui time as part of the time limit_\n * World are named `\"collect-diamond-pickaxe-15min-\" + Math.abs(random.nextInt());`\n \nSometimes we asked the contractors to signify other tasks besides changing the version. This\nprimarily occurred in versions 6 and 7 as 8, 9 and 10 are all task specific.\n\nPrompt to contractors (click to show) \nAnother request about additional time - please use some of it to chop trees. Specifically, please start the recorder by adding --username treechop argument to the script (i.e. use play --username treechop on windows, ./play.sh --username treechop on osx/linux), and spend some time",
+ "type": "code",
+ "location": "/README.md:288-304"
+ },
+ "35": {
+ "file_id": 0,
+ "content": "Code discusses time limits for finding diamonds in Minecraft, changing version numbers, and requesting contractors to chop trees while recording gameplay with a specific username.",
+ "type": "comment"
+ },
+ "36": {
+ "file_id": 0,
+ "content": " chopping trees! Getting wooden or stone tools is ok, but please spend the majority of the with username treechop specifically chopping. I did it myself for about 15 minutes, and it does get boring pretty quickly, so I don't expect you to do it all the time, but please do at least a little bit of chopping. Feel free to play normally the rest of the time (but please restart without --username treechop argument when you are not chopping)\nHowever, it is preferable that you start a new world though, and use only the tools that are easily obtainable in that world. I'll see what I can do about getting player an iron axe - that sounds reasonable, and should not be hard, but will require a code update.\n \n### Environment\nWe restrict the contractors to playing Minecraft in windowed mode at 720p which we downsample at 20hz to 360p\nto minimize space. We also disabled the options screen to prevent the contractor from\nchanging things such as brightness, or rendering options. We ask contractors not to press keys",
+ "type": "code",
+ "location": "/README.md:304-311"
+ },
+ "37": {
+ "file_id": 0,
+ "content": "The code is providing instructions for the video game Minecraft, asking testers to spend a portion of their time chopping trees and using easily obtainable tools in a new world. It also mentions restrictions on the contractor's environment, such as windowed mode, downsampling, and disabling options like brightness and rendering settings.",
+ "type": "comment"
+ },
+ "38": {
+ "file_id": 0,
+ "content": "such as f3 which shows a debug overlay, however some contractors may still do this.\n### Data format\nDemonstrations are broken up into up to 5 minute segments consisting of a series of\ncompressed screen observations, actions, environment statistics, and a checkpoint\nsave file from the start of the segment. Each relative path in the index will\nhave all the files for that given segment, however if a file was dropped while\nuploading, the corresponding relative path is not included in the index therefore\nthere may be missing chunks from otherwise continuous demonstrations.\nIndex files are provided for each version as a json file:\n```json\n{\n \"basedir\": \"https://openaipublic.blob.core.windows.net/data/\",\n \"relpaths\": [\n \"8.0/cheeky-cornflower-setter-74ae6c2eae2e-20220315-122354\",\n ...\n ]\n}\n```\nRelative paths follow the following format:\n* `/---`\n> Note that due to network errors, some segments may be missing from otherwise\ncontinuous demonstrations.",
+ "type": "code",
+ "location": "/README.md:312-338"
+ },
+ "39": {
+ "file_id": 0,
+ "content": "The code provides information about the format of demonstrations, which are broken into 5-minute segments containing compressed screen observations, actions, environment statistics, and checkpoint save files. It also mentions that there may be missing chunks from continuous demonstrations due to network errors. The index files for each version are provided as JSON files with a list of relative paths following a specific format.",
+ "type": "comment"
+ },
+ "40": {
+ "file_id": 0,
+ "content": "Your data loader can then find following files:\n* Video observation: `/.mp4`\n* Action file: `/.jsonl`\n* Options file: `/-options.json`\n* Checkpoint save file: `/.zip`\nThe action file is **not** a valid json object: each line in\naction file is an individual action dictionary.\nFor v7.x, the actions are in form\n```json\n{\n \"mouse\": {\n \"x\": 274.0,\n \"y\": 338.0,\n \"dx\": 0.0,\n \"dy\": 0.0,\n \"scaledX\": -366.0,\n \"scaledY\": -22.0,\n \"dwheel\": 0.0,\n \"buttons\": [],\n \"newButtons\": []\n },\n \"keyboard\": {\n \"keys\": [\n \"key.keyboard.a\",\n \"key.keyboard.s\"\n ],\n \"newKeys\": [],\n \"chars\": \"\"\n },\n \"isGuiOpen\": false,\n \"isGuiInventory\": false,\n \"hotbar\": 4,\n \"yaw\": -112.35006,\n \"pitch\": 8.099996,\n \"xpos\": 841.364694513396,\n \"ypos\": 63.0,\n \"zpos\": 24.956354839537802,\n \"tick\": 0,\n \"milli\": 1649575088006,\n \"inventory\": [\n {\n \"type\": \"oak_door\",\n \"quantity\": 3\n },\n {\n \"type\": \"oak_planks\",\n \"quantity\": 59",
+ "type": "code",
+ "location": "/README.md:340-388"
+ },
+ "41": {
+ "file_id": 0,
+ "content": "The code defines the structure of files required by the data loader, including video observation file, action file, options file, and checkpoint save file. The action file contains a list of individual action dictionaries in JSON format, each representing mouse and keyboard inputs, game status, player position, and inventory for a specific tick or frame.",
+ "type": "comment"
+ },
+ "42": {
+ "file_id": 0,
+ "content": " },\n {\n \"type\": \"stone_pickaxe\",\n \"quantity\": 1\n },\n {\n \"type\": \"oak_planks\",\n \"quantity\": 64\n }\n ],\n \"serverTick\": 6001,\n \"serverTickDurationMs\": 36.3466,\n \"stats\": {\n \"minecraft.custom:minecraft.jump\": 4,\n \"minecraft.custom:minecraft.time_since_rest\": 5999,\n \"minecraft.custom:minecraft.play_one_minute\": 5999,\n \"minecraft.custom:minecraft.time_since_death\": 5999,\n \"minecraft.custom:minecraft.walk_one_cm\": 7554,\n \"minecraft.use_item:minecraft.oak_planks\": 5,\n \"minecraft.custom:minecraft.fall_one_cm\": 269,\n \"minecraft.use_item:minecraft.glass_pane\": 3\n }\n}\n```\n# BASALT 2022 dataset\nWe also collected a dataset of demonstrations for the [MineRL BASALT 2022](https://www.aicrowd.com/challenges/neurips-2022-minerl-basalt-competition) competition, with around 150GB of data per task.\n**Note**: To avoid confusion with the competition rules, the action files (.jsonl) have been stripped of information that is not allowed in the competition. We will upload unmodified dataset after the competition ends.",
+ "type": "code",
+ "location": "/README.md:389-418"
+ },
+ "43": {
+ "file_id": 0,
+ "content": "This code represents a JSON object containing game state information and player actions for Minecraft. It includes a list of items in the player's inventory, server tick data, and various statistics tracking the player's actions. The dataset is collected for the MineRL BASALT 2022 competition with around 150GB of data per task.",
+ "type": "comment"
+ },
+ "44": {
+ "file_id": 0,
+ "content": "* **FindCave** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/find-cave-Jul-28.json)\n * \n Prompt to contractors (click to show) \n ```\n Look around for a cave. When you are inside one, quit the game by opening main menu and pressing \"Save and Quit To Title\".\n You are not allowed to dig down from the surface to find a cave.\n Timelimit: 3 minutes.\n Example recordings: https://www.youtube.com/watch?v=TclP_ozH-eg\n ```\n \n* **MakeWaterfall** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/waterfall-Jul-28.json)\n * \n Prompt to contractors (click to show) \n ```\n After spawning in a mountainous area with a water bucket and various tools, build a beautiful waterfall and then reposition yourself to “take a scenic picture” of the same waterfall, and then quit the game by opening the menu and selecting \"Save and Quit to Title\"\n Timelimit: 5 minutes.",
+ "type": "code",
+ "location": "/README.md:420-439"
+ },
+ "45": {
+ "file_id": 0,
+ "content": "FindCave: Look for a cave and quit the game when inside one. No digging from surface. Timelimit: 3 minutes. Example recordings: https://www.youtube.com/watch?v=TclP_ozH-eg\n\nMakeWaterfall: Spawn in mountainous area, build waterfall, take a scenic picture, and quit the game. Timelimit: 5 minutes.",
+ "type": "comment"
+ },
+ "46": {
+ "file_id": 0,
+ "content": " Example recordings: https://youtu.be/NONcbS85NLA\n ```\n \n* **MakeVillageAnimalPen** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/pen-animals-Jul-28.json)\n * \n Prompt to contractors (click to show) \n ```\n After spawning in a village, build an animal pen next to one of the houses in a village. Use your fence posts to build one animal pen that contains at least two of the same animal. (You are only allowed to pen chickens, cows, pigs, sheep or rabbits.) There should be at least one gate that allows players to enter and exit easily. The animal pen should not contain more than one type of animal. (You may kill any extra types of animals that accidentally got into the pen.) Don’t harm the village.\n After you are done, quit the game by opening the menu and pressing \"Save and Quit to Title\".\n You may need to terraform the area around a house to build a pen. When we say not to harm the village, e",
+ "type": "code",
+ "location": "/README.md:440-451"
+ },
+ "47": {
+ "file_id": 0,
+ "content": "This code provides a prompt for contractors to build an animal pen next to a village house using specific animals and materials, without harming the village.",
+ "type": "comment"
+ },
+ "48": {
+ "file_id": 0,
+ "content": "xamples include taking animals from existing pens, damaging existing houses or farms, and attacking villagers. Animal pens must have a single type of animal: pigs, cows, sheep, chicken or rabbits.\n The food items can be used to lure in the animals: if you hold seeds in your hand, this attracts nearby chickens to you, for example.\n Timelimit: 5 minutes.\n Example recordings: https://youtu.be/SLO7sep7BO8\n ```\n \n* **BuildVillageHouse** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/build-house-Jul-28.json)\n * \n Prompt to contractors (click to show) \n ```\n Taking advantage of the items in your inventory, build a new house in the style of the village (random biome), in an appropriate location (e.g. next to the path through the village), without harming the village in the process.\n Then give a brief tour of the house (i.e. spin around slowly such that all of the walls and the roof are visible).\n ",
+ "type": "code",
+ "location": "/README.md:451-467"
+ },
+ "49": {
+ "file_id": 0,
+ "content": "The code describes tasks for AI to complete in the game of Minecraft. Tasks include not harming animals or damaging existing structures while building a new house, and giving a brief tour of the constructed house. There is also a time limit of 5 minutes and an example recording provided for reference.",
+ "type": "comment"
+ },
+ "50": {
+ "file_id": 0,
+ "content": " * You start with a stone pickaxe and a stone axe, and various building blocks. It’s okay to break items that you misplaced (e.g. use the stone pickaxe to break cobblestone blocks).\n * You are allowed to craft new blocks.\n Please spend less than ten minutes constructing your house.\n You don’t need to copy another house in the village exactly (in fact, we’re more interested in having slight deviations, while keeping the same \"style\"). You may need to terraform the area to make space for a new house.\n When we say not to harm the village, examples include taking animals from existing pens, damaging existing houses or farms, and attacking villagers.\n After you are done, quit the game by opening the menu and pressing \"Save and Quit to Title\".\n Timelimit: 12 minutes.\n Example recordings: https://youtu.be/WeVqQN96V_g\n ```\n \n# Contribution\nThis was a large effort by a dedicated team at OpenAI:\n[Bowen Baker](https://github.com/bowenbaker),\n[Ilge Akkaya](https://github.com/ilge),",
+ "type": "code",
+ "location": "/README.md:467-487"
+ },
+ "51": {
+ "file_id": 0,
+ "content": "Instructions for building a house in Minecraft within 10 minutes, without causing harm to the village.",
+ "type": "comment"
+ },
+ "52": {
+ "file_id": 0,
+ "content": "[Peter Zhokhov](https://github.com/pzhokhov),\n[Joost Huizinga](https://github.com/JoostHuizinga),\n[Jie Tang](https://github.com/jietang),\n[Adrien Ecoffet](https://github.com/AdrienLE),\n[Brandon Houghton](https://github.com/brandonhoughton),\n[Raul Sampedro](https://github.com/samraul),\nJeff Clune\nThe code here represents a minimal version of our model code which was\nprepared by [Anssi Kanervisto](https://github.com/miffyli) and others so that these models could be used as\npart of the MineRL BASALT competition.",
+ "type": "code",
+ "location": "/README.md:488-497"
+ },
+ "53": {
+ "file_id": 0,
+ "content": "This code is acknowledging the authors of the model and crediting Anssi Kanervisto for preparing a minimal version of the code to be used in the MineRL BASALT competition.",
+ "type": "comment"
+ },
+ "54": {
+ "file_id": 1,
+ "content": "/agent.py",
+ "type": "filepath"
+ },
+ "55": {
+ "file_id": 1,
+ "content": "The code establishes Minecraft agent settings for a reinforcement learning project, including environment configuration and an agent class with action mapping, transforming, policy-making capabilities, and device support for actions.",
+ "type": "summary"
+ },
+ "56": {
+ "file_id": 1,
+ "content": "import numpy as np\nimport torch as th\nimport cv2\nfrom gym3.types import DictType\nfrom gym import spaces\nfrom lib.action_mapping import CameraHierarchicalMapping\nfrom lib.actions import ActionTransformer\nfrom lib.policy import MinecraftAgentPolicy\nfrom lib.torch_util import default_device_type, set_default_torch_device\n# Hardcoded settings\nAGENT_RESOLUTION = (128, 128)\nPOLICY_KWARGS = dict(\n attention_heads=16,\n attention_mask_style=\"clipped_causal\",\n attention_memory_size=256,\n diff_mlp_embedding=False,\n hidsize=2048,\n img_shape=[128, 128, 3],\n impala_chans=[16, 32, 32],\n impala_kwargs={\"post_pool_groups\": 1},\n impala_width=8,\n init_norm_kwargs={\"batch_norm\": False, \"group_norm_groups\": 1},\n n_recurrence_layers=4,\n only_img_input=True,\n pointwise_ratio=4,\n pointwise_use_activation=False,\n recurrence_is_residual=True,\n recurrence_type=\"transformer\",\n timesteps=128,\n use_pointwise_layer=True,\n use_pre_lstm_ln=False,\n)\nPI_HEAD_KWARGS = dict(temperature=2.0)\nACTION_TRANSFORMER_KWARGS = dict(",
+ "type": "code",
+ "location": "/agent.py:1-40"
+ },
+ "57": {
+ "file_id": 1,
+ "content": "The code imports necessary libraries and defines various settings for a Minecraft agent. It includes hardcoded resolution, policy arguments, action transformer arguments, and other configuration options. The code seems to be part of a larger project involving reinforcement learning and a specific environment (Minecraft in this case).",
+ "type": "comment"
+ },
+ "58": {
+ "file_id": 1,
+ "content": " camera_binsize=2,\n camera_maxval=10,\n camera_mu=10,\n camera_quantization_scheme=\"mu_law\",\n)\nENV_KWARGS = dict(\n fov_range=[70, 70],\n frameskip=1,\n gamma_range=[2, 2],\n guiscale_range=[1, 1],\n resolution=[640, 360],\n cursor_size_range=[16.0, 16.0],\n)\nTARGET_ACTION_SPACE = {\n \"ESC\": spaces.Discrete(2),\n \"attack\": spaces.Discrete(2),\n \"back\": spaces.Discrete(2),\n \"camera\": spaces.Box(low=-180.0, high=180.0, shape=(2,)),\n \"drop\": spaces.Discrete(2),\n \"forward\": spaces.Discrete(2),\n \"hotbar.1\": spaces.Discrete(2),\n \"hotbar.2\": spaces.Discrete(2),\n \"hotbar.3\": spaces.Discrete(2),\n \"hotbar.4\": spaces.Discrete(2),\n \"hotbar.5\": spaces.Discrete(2),\n \"hotbar.6\": spaces.Discrete(2),\n \"hotbar.7\": spaces.Discrete(2),\n \"hotbar.8\": spaces.Discrete(2),\n \"hotbar.9\": spaces.Discrete(2),\n \"inventory\": spaces.Discrete(2),\n \"jump\": spaces.Discrete(2),\n \"left\": spaces.Discrete(2),\n \"pickItem\": spaces.Discrete(2),\n \"right\": spaces.Discrete(2),\n \"sneak\": spaces.Discrete(2),",
+ "type": "code",
+ "location": "/agent.py:41-77"
+ },
+ "59": {
+ "file_id": 1,
+ "content": "This code sets various parameters for a game agent. It defines action space, environment keywords, and camera settings for the game environment. The action space includes actions like ESC, attack, back, camera adjustment, dropping items, moving, using hotbar slots, inventory management, jumping, moving left/right, and sneaking. Environment keywords specify fields such as field of view (fov_range), frameskip, gamma value range, gui scale range, game resolution, and cursor size range. Camera settings include bin size, maximum value, mean value, and quantization scheme for the camera input.",
+ "type": "comment"
+ },
+ "60": {
+ "file_id": 1,
+ "content": " \"sprint\": spaces.Discrete(2),\n \"swapHands\": spaces.Discrete(2),\n \"use\": spaces.Discrete(2)\n}\ndef validate_env(env):\n \"\"\"Check that the MineRL environment is setup correctly, and raise if not\"\"\"\n for key, value in ENV_KWARGS.items():\n if key == \"frameskip\":\n continue\n if getattr(env.task, key) != value:\n raise ValueError(f\"MineRL environment setting {key} does not match {value}\")\n action_names = set(env.action_space.spaces.keys())\n if action_names != set(TARGET_ACTION_SPACE.keys()):\n raise ValueError(f\"MineRL action space does match. Expected actions {set(TARGET_ACTION_SPACE.keys())}\")\n for ac_space_name, ac_space_space in TARGET_ACTION_SPACE.items():\n if env.action_space.spaces[ac_space_name] != ac_space_space:\n raise ValueError(f\"MineRL action space setting {ac_space_name} does not match {ac_space_space}\")\ndef resize_image(img, target_resolution):\n # For your sanity, do not resize with any function than INTER_LINEAR",
+ "type": "code",
+ "location": "/agent.py:78-101"
+ },
+ "61": {
+ "file_id": 1,
+ "content": "This code defines a dictionary ENV_KWARGS that contains expected environment settings for the MineRL environment. It also includes a function validate_env() to check if the environment is set up correctly and raise an error if any setting does not match the expected value. Additionally, it checks if the action space of the MineRL environment matches the expected actions and their respective spaces. The code also provides a comment suggesting to resize images using INTER_LINEAR resizing function for better results.",
+ "type": "comment"
+ },
+ "62": {
+ "file_id": 1,
+ "content": " img = cv2.resize(img, target_resolution, interpolation=cv2.INTER_LINEAR)\n return img\nclass MineRLAgent:\n def __init__(self, env, device=None, policy_kwargs=None, pi_head_kwargs=None):\n validate_env(env)\n if device is None:\n device = default_device_type()\n self.device = th.device(device)\n # Set the default torch device for underlying code as well\n set_default_torch_device(self.device)\n self.action_mapper = CameraHierarchicalMapping(n_camera_bins=11)\n action_space = self.action_mapper.get_action_space_update()\n action_space = DictType(**action_space)\n self.action_transformer = ActionTransformer(**ACTION_TRANSFORMER_KWARGS)\n if policy_kwargs is None:\n policy_kwargs = POLICY_KWARGS\n if pi_head_kwargs is None:\n pi_head_kwargs = PI_HEAD_KWARGS\n agent_kwargs = dict(policy_kwargs=policy_kwargs, pi_head_kwargs=pi_head_kwargs, action_space=action_space)\n self.policy = MinecraftAgentPolicy(**agent_kwargs).to(device)",
+ "type": "code",
+ "location": "/agent.py:102-128"
+ },
+ "63": {
+ "file_id": 1,
+ "content": "This code defines a MineRLAgent class with an __init__ method. It resizes the image using cv2.resize and returns it. The class has attributes for action_mapper, action_transformer, policy, and device. The policy is created with given policy_kwargs, pi_head_kwargs, and action_space. The device is set as the default torch device.",
+ "type": "comment"
+ },
+ "64": {
+ "file_id": 1,
+ "content": " self.hidden_state = self.policy.initial_state(1)\n self._dummy_first = th.from_numpy(np.array((False,))).to(device)\n def load_weights(self, path):\n \"\"\"Load model weights from a path, and reset hidden state\"\"\"\n self.policy.load_state_dict(th.load(path, map_location=self.device), strict=False)\n self.reset()\n def reset(self):\n \"\"\"Reset agent to initial state (i.e., reset hidden state)\"\"\"\n self.hidden_state = self.policy.initial_state(1)\n def _env_obs_to_agent(self, minerl_obs):\n \"\"\"\n Turn observation from MineRL environment into model's observation\n Returns torch tensors.\n \"\"\"\n agent_input = resize_image(minerl_obs[\"pov\"], AGENT_RESOLUTION)[None]\n agent_input = {\"img\": th.from_numpy(agent_input).to(self.device)}\n return agent_input\n def _agent_action_to_env(self, agent_action):\n \"\"\"Turn output from policy into action for MineRL\"\"\"\n # This is quite important step (for some reason).\n # For the sake of your sanity, remember to do this step (manual conversion to numpy)",
+ "type": "code",
+ "location": "/agent.py:129-154"
+ },
+ "65": {
+ "file_id": 1,
+ "content": "The code defines a class for an agent with methods to reset its hidden state, convert MineRL observations into the model's observation format, and convert policy output into actions for the MineRL environment. It also includes a method to load model weights from a given path while resetting the hidden state.",
+ "type": "comment"
+ },
+ "66": {
+ "file_id": 1,
+ "content": " # before proceeding. Otherwise, your agent might be a little derp.\n action = agent_action\n if isinstance(action[\"buttons\"], th.Tensor):\n action = {\n \"buttons\": agent_action[\"buttons\"].cpu().numpy(),\n \"camera\": agent_action[\"camera\"].cpu().numpy()\n }\n minerl_action = self.action_mapper.to_factored(action)\n minerl_action_transformed = self.action_transformer.policy2env(minerl_action)\n return minerl_action_transformed\n def _env_action_to_agent(self, minerl_action_transformed, to_torch=False, check_if_null=False):\n \"\"\"\n Turn action from MineRL to model's action.\n Note that this will add batch dimensions to the action.\n Returns numpy arrays, unless `to_torch` is True, in which case it returns torch tensors.\n If `check_if_null` is True, check if the action is null (no action) after the initial\n transformation. This matches the behaviour done in OpenAI's VPT work.\n If action is null, return \"None\" instead",
+ "type": "code",
+ "location": "/agent.py:155-175"
+ },
+ "67": {
+ "file_id": 1,
+ "content": "This code snippet is part of a function that transforms actions from MineRL format to the model's action format and vice versa. It also handles the conversion between PyTorch tensors and numpy arrays, and checks if the action is null (no action).",
+ "type": "comment"
+ },
+ "68": {
+ "file_id": 1,
+ "content": " \"\"\"\n minerl_action = self.action_transformer.env2policy(minerl_action_transformed)\n if check_if_null:\n if np.all(minerl_action[\"buttons\"] == 0) and np.all(minerl_action[\"camera\"] == self.action_transformer.camera_zero_bin):\n return None\n # Add batch dims if not existant\n if minerl_action[\"camera\"].ndim == 1:\n minerl_action = {k: v[None] for k, v in minerl_action.items()}\n action = self.action_mapper.from_factored(minerl_action)\n if to_torch:\n action = {k: th.from_numpy(v).to(self.device) for k, v in action.items()}\n return action\n def get_action(self, minerl_obs):\n \"\"\"\n Get agent's action for given MineRL observation.\n Agent's hidden state is tracked internally. To reset it,\n call `reset()`.\n \"\"\"\n agent_input = self._env_obs_to_agent(minerl_obs)\n # The \"first\" argument could be used to reset tell episode\n # boundaries, but we are only using this for predicting (for now),",
+ "type": "code",
+ "location": "/agent.py:176-199"
+ },
+ "69": {
+ "file_id": 1,
+ "content": "This code defines a class that takes MineRL observations as input and outputs the corresponding action. It includes methods for transforming actions, mapping actions, and getting an agent's action for a given observation. The action is returned with batch dimensions if necessary, and can be converted to PyTorch tensors if needed.",
+ "type": "comment"
+ },
+ "70": {
+ "file_id": 1,
+ "content": " # so we do not hassle with it yet.\n agent_action, self.hidden_state, _ = self.policy.act(\n agent_input, self._dummy_first, self.hidden_state,\n stochastic=True\n )\n minerl_action = self._agent_action_to_env(agent_action)\n return minerl_action",
+ "type": "code",
+ "location": "/agent.py:200-206"
+ },
+ "71": {
+ "file_id": 1,
+ "content": "This code selects an action from the agent's policy and returns it after converting to environment format.",
+ "type": "comment"
+ },
+ "72": {
+ "file_id": 2,
+ "content": "/behavioural_cloning.py",
+ "type": "filepath"
+ },
+ "73": {
+ "file_id": 2,
+ "content": "The code imports necessary libraries, defines parameters, and creates an agent object for policy-based actor-critic model training in a behavioral cloning task. It trains the model using batches of data, updates weights, and reports average loss at specified intervals.",
+ "type": "summary"
+ },
+ "74": {
+ "file_id": 2,
+ "content": "# Basic behavioural cloning\n# Note: this uses gradient accumulation in batches of ones\n# to perform training.\n# This will fit inside even smaller GPUs (tested on 8GB one),\n# but is slow.\n# NOTE: This is _not_ the original code used for VPT!\n# This is merely to illustrate how to fine-tune the models and includes\n# the processing steps used.\n# This will likely be much worse than what original VPT did:\n# we are not training on full sequences, but only one step at a time to save VRAM.\nfrom argparse import ArgumentParser\nimport pickle\nimport time\nimport gym\nimport minerl\nimport torch as th\nimport numpy as np\nfrom agent import PI_HEAD_KWARGS, MineRLAgent\nfrom data_loader import DataLoader\nfrom lib.tree_util import tree_map\nEPOCHS = 2\n# Needs to be <= number of videos\nBATCH_SIZE = 8\n# Ideally more than batch size to create\n# variation in datasets (otherwise, you will\n# get a bunch of consecutive samples)\n# Decrease this (and batch_size) if you run out of memory\nN_WORKERS = 12\nDEVICE = \"cuda\"",
+ "type": "code",
+ "location": "/behavioural_cloning.py:1-34"
+ },
+ "75": {
+ "file_id": 2,
+ "content": "This code imports necessary libraries and defines constants for basic behavioral cloning using gradient accumulation. It uses a smaller GPU, and it's not the original code used for VPT but serves to illustrate fine-tuning models with specific processing steps. The code specifies the number of epochs, batch size, number of workers, and device for training.",
+ "type": "comment"
+ },
+ "76": {
+ "file_id": 2,
+ "content": "LOSS_REPORT_RATE = 100\nLEARNING_RATE = 0.000181\nWEIGHT_DECAY = 0.039428\nMAX_GRAD_NORM = 5.0\ndef load_model_parameters(path_to_model_file):\n agent_parameters = pickle.load(open(path_to_model_file, \"rb\"))\n policy_kwargs = agent_parameters[\"model\"][\"args\"][\"net\"][\"args\"]\n pi_head_kwargs = agent_parameters[\"model\"][\"args\"][\"pi_head_opts\"]\n pi_head_kwargs[\"temperature\"] = float(pi_head_kwargs[\"temperature\"])\n return policy_kwargs, pi_head_kwargs\ndef behavioural_cloning_train(data_dir, in_model, in_weights, out_weights):\n agent_policy_kwargs, agent_pi_head_kwargs = load_model_parameters(in_model)\n # To create model with the right environment.\n # All basalt environments have the same settings, so any of them works here\n env = gym.make(\"MineRLBasaltFindCave-v0\")\n agent = MineRLAgent(env, device=DEVICE, policy_kwargs=agent_policy_kwargs, pi_head_kwargs=agent_pi_head_kwargs)\n agent.load_weights(in_weights)\n env.close()\n policy = agent.policy\n trainable_parameters = policy.parameters()",
+ "type": "code",
+ "location": "/behavioural_cloning.py:36-60"
+ },
+ "77": {
+ "file_id": 2,
+ "content": "Load model parameters from file, define environment settings, and create agent object with defined policy and head parameters.",
+ "type": "comment"
+ },
+ "78": {
+ "file_id": 2,
+ "content": " # Parameters taken from the OpenAI VPT paper\n optimizer = th.optim.Adam(\n trainable_parameters,\n lr=LEARNING_RATE,\n weight_decay=WEIGHT_DECAY\n )\n data_loader = DataLoader(\n dataset_dir=data_dir,\n n_workers=N_WORKERS,\n batch_size=BATCH_SIZE,\n n_epochs=EPOCHS\n )\n start_time = time.time()\n # Keep track of the hidden state per episode/trajectory.\n # DataLoader provides unique id for each episode, which will\n # be different even for the same trajectory when it is loaded\n # up again\n episode_hidden_states = {}\n dummy_first = th.from_numpy(np.array((False,))).to(DEVICE)\n loss_sum = 0\n for batch_i, (batch_images, batch_actions, batch_episode_id) in enumerate(data_loader):\n batch_loss = 0\n for image, action, episode_id in zip(batch_images, batch_actions, batch_episode_id):\n agent_action = agent._env_action_to_agent(action, to_torch=True, check_if_null=True)\n if agent_action is None:\n # Action was null",
+ "type": "code",
+ "location": "/behavioural_cloning.py:62-91"
+ },
+ "79": {
+ "file_id": 2,
+ "content": "Setting up optimizer, data loader, and initializing variables for training.",
+ "type": "comment"
+ },
+ "80": {
+ "file_id": 2,
+ "content": " continue\n agent_obs = agent._env_obs_to_agent({\"pov\": image})\n if episode_id not in episode_hidden_states:\n # TODO need to clean up this hidden state after worker is done with the work item.\n # Leaks memory, but not tooooo much at these scales (will be a problem later).\n episode_hidden_states[episode_id] = policy.initial_state(1)\n agent_state = episode_hidden_states[episode_id]\n pi_distribution, v_prediction, new_agent_state = policy.get_output_for_observation(\n agent_obs,\n agent_state,\n dummy_first\n )\n log_prob = policy.get_logprob_of_action(pi_distribution, agent_action)\n # Make sure we do not try to backprop through sequence\n # (fails with current accumulation)\n new_agent_state = tree_map(lambda x: x.detach(), new_agent_state)\n episode_hidden_states[episode_id] = new_agent_state\n # Finally, update the agent to increase the probability of the",
+ "type": "code",
+ "location": "/behavioural_cloning.py:92-114"
+ },
+ "81": {
+ "file_id": 2,
+ "content": "The code is setting up the environment for a policy-based actor-critic model in a behavioral cloning task. It assigns the hidden state for the episode, gets the output for the observation, calculates the log probability of the action, and updates the agent's state.",
+ "type": "comment"
+ },
+ "82": {
+ "file_id": 2,
+ "content": " # taken action.\n # Remember to take mean over batch losses\n loss = -log_prob / BATCH_SIZE\n batch_loss += loss.item()\n loss.backward()\n th.nn.utils.clip_grad_norm_(trainable_parameters, MAX_GRAD_NORM)\n optimizer.step()\n optimizer.zero_grad()\n loss_sum += batch_loss\n if batch_i % LOSS_REPORT_RATE == 0:\n time_since_start = time.time() - start_time\n print(f\"Time: {time_since_start:.2f}, Batches: {batch_i}, Avrg loss: {loss_sum / LOSS_REPORT_RATE:.4f}\")\n loss_sum = 0\n state_dict = policy.state_dict()\n th.save(state_dict, out_weights)\nif __name__ == \"__main__\":\n parser = ArgumentParser()\n parser.add_argument(\"--data-dir\", type=str, required=True, help=\"Path to the directory containing recordings to be trained on\")\n parser.add_argument(\"--in-model\", required=True, type=str, help=\"Path to the .model file to be finetuned\")\n parser.add_argument(\"--in-weights\", required=True, type=str, help=\"Path to the .weights file to be finetuned\")",
+ "type": "code",
+ "location": "/behavioural_cloning.py:115-139"
+ },
+ "83": {
+ "file_id": 2,
+ "content": "The code is training a policy model using behavioral cloning on batches of data. It calculates the batch loss, applies gradients and updates weights, saves state dictionary to a specified output file, and reports the average loss every LOSS_REPORT_RATE batches. The inputs are the path to the directory containing recordings for training, the path to the model file to be fine-tuned, and the path to the weights file to be fine-tuned.",
+ "type": "comment"
+ },
+ "84": {
+ "file_id": 2,
+ "content": " parser.add_argument(\"--out-weights\", required=True, type=str, help=\"Path where finetuned weights will be saved\")\n args = parser.parse_args()\n behavioural_cloning_train(args.data_dir, args.in_model, args.in_weights, args.out_weights)",
+ "type": "code",
+ "location": "/behavioural_cloning.py:140-143"
+ },
+ "85": {
+ "file_id": 2,
+ "content": "The code adds an argument for the output weights path and parses the command line arguments, then calls the behavioral cloning training function.",
+ "type": "comment"
+ },
+ "86": {
+ "file_id": 3,
+ "content": "/data_loader.py",
+ "type": "filepath"
+ },
+ "87": {
+ "file_id": 3,
+ "content": "The code imports libraries, initializes a data loader class for simpler code, lacks sub-sequence support, and processes data for a batch of samples with workers outputting all samples to the same batch. The `__del__` method terminates and joins processes when object is deleted.",
+ "type": "summary"
+ },
+ "88": {
+ "file_id": 3,
+ "content": "# Code for loading OpenAI MineRL VPT datasets\n# NOTE: This is NOT original code used for the VPT experiments!\n# (But contains all [or at least most] steps done in the original data loading)\nimport json\nimport glob\nimport os\nimport random\nfrom multiprocessing import Process, Queue, Event\nimport numpy as np\nimport cv2\nfrom run_inverse_dynamics_model import json_action_to_env_action\nfrom agent import resize_image, AGENT_RESOLUTION\nQUEUE_TIMEOUT = 10\nCURSOR_FILE = os.path.join(os.path.dirname(__file__), \"cursors\", \"mouse_cursor_white_16x16.png\")\nMINEREC_ORIGINAL_HEIGHT_PX = 720\n# If GUI is open, mouse dx/dy need also be adjusted with these scalers.\n# If data version is not present, assume it is 1.\nMINEREC_VERSION_SPECIFIC_SCALERS = {\n \"5.7\": 0.5,\n \"5.8\": 0.5,\n \"6.7\": 2.0,\n \"6.8\": 2.0,\n \"6.9\": 2.0,\n}\ndef composite_images_with_alpha(image1, image2, alpha, x, y):\n \"\"\"\n Draw image2 over image1 at location x,y, using alpha as the opacity for image2.\n Modifies image1 in-place\n \"\"\"\n ch = max(0, min(image1.shape[0] - y, image2.shape[0]))",
+ "type": "code",
+ "location": "/data_loader.py:1-40"
+ },
+ "89": {
+ "file_id": 3,
+ "content": "This code imports necessary libraries and defines functions for loading OpenAI MineRL VPT datasets, adjusting cursor position based on version-specific scalers, and compositing images with alpha transparency.",
+ "type": "comment"
+ },
+ "90": {
+ "file_id": 3,
+ "content": " cw = max(0, min(image1.shape[1] - x, image2.shape[1]))\n if ch == 0 or cw == 0:\n return\n alpha = alpha[:ch, :cw]\n image1[y:y + ch, x:x + cw, :] = (image1[y:y + ch, x:x + cw, :] * (1 - alpha) + image2[:ch, :cw, :] * alpha).astype(np.uint8)\ndef data_loader_worker(tasks_queue, output_queue, quit_workers_event):\n \"\"\"\n Worker for the data loader.\n \"\"\"\n cursor_image = cv2.imread(CURSOR_FILE, cv2.IMREAD_UNCHANGED)\n # Assume 16x16\n cursor_image = cursor_image[:16, :16, :]\n cursor_alpha = cursor_image[:, :, 3:] / 255.0\n cursor_image = cursor_image[:, :, :3]\n while True:\n task = tasks_queue.get()\n if task is None:\n break\n trajectory_id, video_path, json_path = task\n video = cv2.VideoCapture(video_path)\n # NOTE: In some recordings, the game seems to start\n # with attack always down from the beginning, which\n # is stuck down until player actually presses attack\n # NOTE: It is uncertain if this was the issue with the original code.",
+ "type": "code",
+ "location": "/data_loader.py:41-67"
+ },
+ "91": {
+ "file_id": 3,
+ "content": "The code reads a video and its corresponding JSON file to extract frames and annotations for each frame. It initializes a cursor image and alpha channel, then continuously processes tasks from the tasks queue. If a task is None, it breaks the loop. The code checks if the video contains the game starting with attack always down by noting that it might be stuck down until the player presses attack.",
+ "type": "comment"
+ },
+ "92": {
+ "file_id": 3,
+ "content": " attack_is_stuck = False\n # Scrollwheel is allowed way to change items, but this is\n # not captured by the recorder.\n # Work around this by keeping track of selected hotbar item\n # and updating \"hotbar.#\" actions when hotbar selection changes.\n # NOTE: It is uncertain is this was/is an issue with the contractor data\n last_hotbar = 0\n with open(json_path) as json_file:\n json_lines = json_file.readlines()\n json_data = \"[\" + \",\".join(json_lines) + \"]\"\n json_data = json.loads(json_data)\n for i in range(len(json_data)):\n if quit_workers_event.is_set():\n break\n step_data = json_data[i]\n if i == 0:\n # Check if attack will be stuck down\n if step_data[\"mouse\"][\"newButtons\"] == [0]:\n attack_is_stuck = True\n elif attack_is_stuck:\n # Check if we press attack down, then it might not be stuck\n if 0 in step_data[\"mouse\"][\"newButtons\"]:",
+ "type": "code",
+ "location": "/data_loader.py:68-91"
+ },
+ "93": {
+ "file_id": 3,
+ "content": "Checking if attack is stuck by monitoring scrollwheel actions and updating \"hotbar.#\" actions when hotbar selection changes.",
+ "type": "comment"
+ },
+ "94": {
+ "file_id": 3,
+ "content": " attack_is_stuck = False\n # If still stuck, remove the action\n if attack_is_stuck:\n step_data[\"mouse\"][\"buttons\"] = [button for button in step_data[\"mouse\"][\"buttons\"] if button != 0]\n action, is_null_action = json_action_to_env_action(step_data)\n # Update hotbar selection\n current_hotbar = step_data[\"hotbar\"]\n if current_hotbar != last_hotbar:\n action[\"hotbar.{}\".format(current_hotbar + 1)] = 1\n last_hotbar = current_hotbar\n # Read frame even if this is null so we progress forward\n ret, frame = video.read()\n if ret:\n # Skip null actions as done in the VPT paper\n # NOTE: in VPT paper, this was checked _after_ transforming into agent's action-space.\n # We do this here as well to reduce amount of data sent over.\n if is_null_action:\n continue\n if step_data[\"isGuiOpen\"]:",
+ "type": "code",
+ "location": "/data_loader.py:92-113"
+ },
+ "95": {
+ "file_id": 3,
+ "content": "Checking for stuck state and removing action, updating hotbar selection, reading frame even if null to progress forward.",
+ "type": "comment"
+ },
+ "96": {
+ "file_id": 3,
+ "content": " camera_scaling_factor = frame.shape[0] / MINEREC_ORIGINAL_HEIGHT_PX\n cursor_x = int(step_data[\"mouse\"][\"x\"] * camera_scaling_factor)\n cursor_y = int(step_data[\"mouse\"][\"y\"] * camera_scaling_factor)\n composite_images_with_alpha(frame, cursor_image, cursor_alpha, cursor_x, cursor_y)\n cv2.cvtColor(frame, code=cv2.COLOR_BGR2RGB, dst=frame)\n frame = np.asarray(np.clip(frame, 0, 255), dtype=np.uint8)\n frame = resize_image(frame, AGENT_RESOLUTION)\n output_queue.put((trajectory_id, frame, action), timeout=QUEUE_TIMEOUT)\n else:\n print(f\"Could not read frame from video {video_path}\")\n video.release()\n if quit_workers_event.is_set():\n break\n # Tell that we ended\n output_queue.put(None)\nclass DataLoader:\n \"\"\"\n Generator class for loading batches from a dataset\n This only returns a single step at a time per worker; no sub-sequences.",
+ "type": "code",
+ "location": "/data_loader.py:114-134"
+ },
+ "97": {
+ "file_id": 3,
+ "content": "Applies camera scaling factor to mouse coordinates, composes cursor image with frame, converts image color, clips and resizes the frame, then puts (trajectory_id, frame, action) in output queue. If frame cannot be read, prints an error message. Finally, releases video and checks quit_workers_event before putting None in output queue to signal end of data loading.",
+ "type": "comment"
+ },
+ "98": {
+ "file_id": 3,
+ "content": " Idea is that you keep track of the model's hidden state and feed that in,\n along with one sample at a time.\n + Simpler loader code\n + Supports lower end hardware\n - Not very efficient (could be faster)\n - No support for sub-sequences\n - Loads up individual files as trajectory files (i.e. if a trajectory is split into multiple files,\n this code will load it up as a separate item).\n \"\"\"\n def __init__(self, dataset_dir, n_workers=8, batch_size=8, n_epochs=1, max_queue_size=16):\n assert n_workers >= batch_size, \"Number of workers must be equal or greater than batch size\"\n self.dataset_dir = dataset_dir\n self.n_workers = n_workers\n self.n_epochs = n_epochs\n self.batch_size = batch_size\n self.max_queue_size = max_queue_size\n unique_ids = glob.glob(os.path.join(dataset_dir, \"*.mp4\"))\n unique_ids = list(set([os.path.basename(x).split(\".\")[0] for x in unique_ids]))\n self.unique_ids = unique_ids\n # Create tuples of (video_path, json_path) for each unique_id",
+ "type": "code",
+ "location": "/data_loader.py:135-155"
+ },
+ "99": {
+ "file_id": 3,
+ "content": "This code initializes a data loader class that tracks the model's hidden state and feeds it along with one sample at a time. It supports simpler loader code, lower end hardware, but is not very efficient and lacks support for sub-sequences. The loader loads individual files as trajectory files if they are split into multiple files.",
+ "type": "comment"
+ }
+}
\ No newline at end of file
diff --git a/docs/data/1.json b/docs/data/1.json
new file mode 100644
index 0000000..1fe833a
--- /dev/null
+++ b/docs/data/1.json
@@ -0,0 +1,546 @@
+{
+ "100": {
+ "file_id": 3,
+ "content": " demonstration_tuples = []\n for unique_id in unique_ids:\n video_path = os.path.abspath(os.path.join(dataset_dir, unique_id + \".mp4\"))\n json_path = os.path.abspath(os.path.join(dataset_dir, unique_id + \".jsonl\"))\n demonstration_tuples.append((video_path, json_path))\n assert n_workers <= len(demonstration_tuples), f\"n_workers should be lower or equal than number of demonstrations {len(demonstration_tuples)}\"\n # Repeat dataset for n_epochs times, shuffling the order for\n # each epoch\n self.demonstration_tuples = []\n for i in range(n_epochs):\n random.shuffle(demonstration_tuples)\n self.demonstration_tuples += demonstration_tuples\n self.task_queue = Queue()\n self.n_steps_processed = 0\n for trajectory_id, task in enumerate(self.demonstration_tuples):\n self.task_queue.put((trajectory_id, *task))\n for _ in range(n_workers):\n self.task_queue.put(None)\n self.output_queues = [Queue(maxsize=max_queue_size) for _ in range(n_workers)]",
+ "type": "code",
+ "location": "/data_loader.py:156-178"
+ },
+ "101": {
+ "file_id": 3,
+ "content": "This code is creating a data loader for video demonstrations. It collects the video and JSONL file paths for each unique ID, shuffles them for each epoch, and adds them to the task queue. It also creates output queues for worker threads.",
+ "type": "comment"
+ },
+ "102": {
+ "file_id": 3,
+ "content": " self.quit_workers_event = Event()\n self.processes = [\n Process(\n target=data_loader_worker,\n args=(\n self.task_queue,\n output_queue,\n self.quit_workers_event,\n ),\n daemon=True\n )\n for output_queue in self.output_queues\n ]\n for process in self.processes:\n process.start()\n def __iter__(self):\n return self\n def __next__(self):\n batch_frames = []\n batch_actions = []\n batch_episode_id = []\n for i in range(self.batch_size):\n workitem = self.output_queues[self.n_steps_processed % self.n_workers].get(timeout=QUEUE_TIMEOUT)\n if workitem is None:\n # Stop iteration when first worker runs out of work to do.\n # Yes, this has a chance of cutting out a lot of the work,\n # but this ensures batches will remain diverse, instead\n # of having bad ones in the end where potentially",
+ "type": "code",
+ "location": "/data_loader.py:179-209"
+ },
+ "103": {
+ "file_id": 3,
+ "content": "This code sets up data loading workers as separate processes, and then starts them. The iterator function retrieves batch frames, actions, and episode IDs from the output queues of these worker processes until one of the workers runs out of work.",
+ "type": "comment"
+ },
+ "104": {
+ "file_id": 3,
+ "content": " # one worker outputs all samples to the same batch.\n raise StopIteration()\n trajectory_id, frame, action = workitem\n batch_frames.append(frame)\n batch_actions.append(action)\n batch_episode_id.append(trajectory_id)\n self.n_steps_processed += 1\n return batch_frames, batch_actions, batch_episode_id\n def __del__(self):\n for process in self.processes:\n process.terminate()\n process.join()",
+ "type": "code",
+ "location": "/data_loader.py:210-222"
+ },
+ "105": {
+ "file_id": 3,
+ "content": "This code is processing data for a batch of samples, where each worker outputs all samples to the same batch. It appends frames, actions, and episode IDs to their respective lists before returning them as a batch. The `__del__` method ensures all processes are terminated and joined when the object is deleted.",
+ "type": "comment"
+ },
+ "106": {
+ "file_id": 4,
+ "content": "/inverse_dynamics_model.py",
+ "type": "filepath"
+ },
+ "107": {
+ "file_id": 4,
+ "content": "IDMAgent is a Minecraft action predictor using the IDM model, featuring functions for initializing, loading weights, resetting state, and processing video frames. It converts policy output to MineRL format for agent state prediction.",
+ "type": "summary"
+ },
+ "108": {
+ "file_id": 4,
+ "content": "import numpy as np\nimport torch as th\nimport cv2\nfrom gym3.types import DictType\nfrom gym import spaces\nfrom lib.action_mapping import CameraHierarchicalMapping, IDMActionMapping\nfrom lib.actions import ActionTransformer\nfrom lib.policy import InverseActionPolicy\nfrom lib.torch_util import default_device_type, set_default_torch_device\nfrom agent import resize_image, AGENT_RESOLUTION\nACTION_TRANSFORMER_KWARGS = dict(\n camera_binsize=2,\n camera_maxval=10,\n camera_mu=10,\n camera_quantization_scheme=\"mu_law\",\n)\nclass IDMAgent:\n \"\"\"\n Sugarcoating on the inverse dynamics model (IDM) used to predict actions Minecraft players take in videos.\n Functionally same as MineRLAgent.\n \"\"\"\n def __init__(self, idm_net_kwargs, pi_head_kwargs, device=None):\n if device is None:\n device = default_device_type()\n self.device = th.device(device)\n # Set the default torch device for underlying code as well\n set_default_torch_device(self.device)\n self.action_mapper = IDMActionMapping(n_camera_bins=11)",
+ "type": "code",
+ "location": "/inverse_dynamics_model.py:1-33"
+ },
+ "109": {
+ "file_id": 4,
+ "content": "IDMAgent is a class representing an agent that uses the inverse dynamics model (IDM) to predict Minecraft player actions in videos. It has an action mapper and is initialized with idm_net_kwargs, pi_head_kwargs, and device (default device type if None).",
+ "type": "comment"
+ },
+ "110": {
+ "file_id": 4,
+ "content": " action_space = self.action_mapper.get_action_space_update()\n action_space = DictType(**action_space)\n self.action_transformer = ActionTransformer(**ACTION_TRANSFORMER_KWARGS)\n idm_policy_kwargs = dict(idm_net_kwargs=idm_net_kwargs, pi_head_kwargs=pi_head_kwargs, action_space=action_space)\n self.policy = InverseActionPolicy(**idm_policy_kwargs).to(device)\n self.hidden_state = self.policy.initial_state(1)\n self._dummy_first = th.from_numpy(np.array((False,))).to(device)\n def load_weights(self, path):\n \"\"\"Load model weights from a path, and reset hidden state\"\"\"\n self.policy.load_state_dict(th.load(path, map_location=self.device), strict=False)\n self.reset()\n def reset(self):\n \"\"\"Reset agent to initial state (i.e., reset hidden state)\"\"\"\n self.hidden_state = self.policy.initial_state(1)\n def _video_obs_to_agent(self, video_frames):\n imgs = [resize_image(frame, AGENT_RESOLUTION) for frame in video_frames]\n # Add time and batch dim",
+ "type": "code",
+ "location": "/inverse_dynamics_model.py:34-56"
+ },
+ "111": {
+ "file_id": 4,
+ "content": "Function: __init__\n- Initializes the agent with specified parameters and loads initial weights.\n\nFunction: load_weights\n- Loads model weights from a path and resets the hidden state of the agent.\n\nFunction: reset\n- Resets the agent to its initial state by setting the hidden state to the result of the policy's initial_state method with an argument of 1.\n\nFunction:_video_obs_to_agent\n- Takes a list of video frames, resizes them to AGENT_RESOLUTION, and returns the processed images for the agent to use.",
+ "type": "comment"
+ },
+ "112": {
+ "file_id": 4,
+ "content": " imgs = np.stack(imgs)[None]\n agent_input = {\"img\": th.from_numpy(imgs).to(self.device)}\n return agent_input\n def _agent_action_to_env(self, agent_action):\n \"\"\"Turn output from policy into action for MineRL\"\"\"\n # This is quite important step (for some reason).\n # For the sake of your sanity, remember to do this step (manual conversion to numpy)\n # before proceeding. Otherwise, your agent might be a little derp.\n action = {\n \"buttons\": agent_action[\"buttons\"].cpu().numpy(),\n \"camera\": agent_action[\"camera\"].cpu().numpy()\n }\n minerl_action = self.action_mapper.to_factored(action)\n minerl_action_transformed = self.action_transformer.policy2env(minerl_action)\n return minerl_action_transformed\n def predict_actions(self, video_frames):\n \"\"\"\n Predict actions for a sequence of frames.\n `video_frames` should be of shape (N, H, W, C).\n Returns MineRL action dict, where each action head",
+ "type": "code",
+ "location": "/inverse_dynamics_model.py:57-79"
+ },
+ "113": {
+ "file_id": 4,
+ "content": "Code snippet:\n```python\n def _agent_action_to_env(self, agent_action):\n \"\"\"Turn output from policy into action for MineRL\"\"\"\n # Manual conversion to numpy is important.\n action = {\n \"buttons\": agent_action[\"buttons\"].cpu().numpy(),\n \"camera\": agent_action[\"camera\"].cpu().numpy()\n }\n```\nComment: Converts policy output to MineRL action format using manual numpy conversion",
+ "type": "comment"
+ },
+ "114": {
+ "file_id": 4,
+ "content": " has shape (N, ...).\n Agent's hidden state is tracked internally. To reset it,\n call `reset()`.\n \"\"\"\n agent_input = self._video_obs_to_agent(video_frames)\n # The \"first\" argument could be used to reset tell episode\n # boundaries, but we are only using this for predicting (for now),\n # so we do not hassle with it yet.\n dummy_first = th.zeros((video_frames.shape[0], 1)).to(self.device)\n predicted_actions, self.hidden_state, _ = self.policy.predict(\n agent_input, first=dummy_first, state_in=self.hidden_state,\n deterministic=True\n )\n predicted_minerl_action = self._agent_action_to_env(predicted_actions)\n return predicted_minerl_action",
+ "type": "code",
+ "location": "/inverse_dynamics_model.py:80-95"
+ },
+ "115": {
+ "file_id": 4,
+ "content": "This function takes video frames as input, converts them to agent input, and uses the policy model to predict actions. It also maintains an internal hidden state for tracking the agent's state and can be reset using `reset()`.",
+ "type": "comment"
+ },
+ "116": {
+ "file_id": 5,
+ "content": "/lib/action_head.py",
+ "type": "filepath"
+ },
+ "117": {
+ "file_id": 5,
+ "content": "The code introduces an `ActionHead` abstract base class for reinforcement learning action heads, including methods such as logprob, sample, entropy, and kl_divergence. It supports Discrete, Real, and DictType action spaces and has reset parameters and forward pass functionality.",
+ "type": "summary"
+ },
+ "118": {
+ "file_id": 5,
+ "content": "import logging\nfrom typing import Any, Tuple\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.nn.init as init\nfrom gym3.types import DictType, Discrete, Real, TensorType, ValType\nLOG0 = -100\ndef fan_in_linear(module: nn.Module, scale=1.0, bias=True):\n \"\"\"Fan-in init\"\"\"\n module.weight.data *= scale / module.weight.norm(dim=1, p=2, keepdim=True)\n if bias:\n module.bias.data *= 0\nclass ActionHead(nn.Module):\n \"\"\"Abstract base class for action heads compatible with forc\"\"\"\n def forward(self, input_data: torch.Tensor) -> Any:\n \"\"\"\n Just a forward pass through this head\n :returns pd_params - parameters describing the probability distribution\n \"\"\"\n raise NotImplementedError\n def logprob(self, action_sample: torch.Tensor, pd_params: torch.Tensor) -> torch.Tensor:\n \"\"\"Logartithm of probability of sampling `action_sample` from a probability described by `pd_params`\"\"\"\n raise NotImplementedError\n def entropy(self, pd_params: torch.Tensor) -> torch.Tensor:",
+ "type": "code",
+ "location": "/lib/action_head.py:1-36"
+ },
+ "119": {
+ "file_id": 5,
+ "content": "This code defines an ActionHead class and a fan_in_linear function. ActionHead is an abstract base class for action heads, which are used in reinforcement learning to determine the optimal actions. The fan_in_linear function initializes the weights of the linear layer using the Fan-in initialization method.",
+ "type": "comment"
+ },
+ "120": {
+ "file_id": 5,
+ "content": " \"\"\"Entropy of this distribution\"\"\"\n raise NotImplementedError\n def sample(self, pd_params: torch.Tensor, deterministic: bool = False) -> Any:\n \"\"\"\n Draw a sample from probability distribution given by those params\n :param pd_params Parameters of a probability distribution\n :param deterministic Whether to return a stochastic sample or deterministic mode of a distribution\n \"\"\"\n raise NotImplementedError\n def kl_divergence(self, params_q: torch.Tensor, params_p: torch.Tensor) -> torch.Tensor:\n \"\"\"KL divergence between two distribution described by these two params\"\"\"\n raise NotImplementedError\nclass DiagGaussianActionHead(ActionHead):\n \"\"\"\n Action head where actions are normally distributed uncorrelated variables with specific means and variances.\n Means are calculated directly from the network while standard deviations are a parameter of this module\n \"\"\"\n LOG2PI = np.log(2.0 * np.pi)\n def __init__(self, input_dim: int, num_dimensions: int):",
+ "type": "code",
+ "location": "/lib/action_head.py:37-63"
+ },
+ "121": {
+ "file_id": 5,
+ "content": "This code defines an abstract base class `ActionHead` for entropy, sampling, and KL divergence calculation. It raises a NotImplementedError since subclasses should provide the actual implementation of these methods. The `DiagGaussianActionHead` class is also defined, which inherits from `ActionHead`, representing action heads with normally distributed uncorrelated variables based on network output mean and standard deviation parameters.",
+ "type": "comment"
+ },
+ "122": {
+ "file_id": 5,
+ "content": " super().__init__()\n self.input_dim = input_dim\n self.num_dimensions = num_dimensions\n self.linear_layer = nn.Linear(input_dim, num_dimensions)\n self.log_std = nn.Parameter(torch.zeros(num_dimensions), requires_grad=True)\n def reset_parameters(self):\n init.orthogonal_(self.linear_layer.weight, gain=0.01)\n init.constant_(self.linear_layer.bias, 0.0)\n def forward(self, input_data: torch.Tensor, mask=None) -> torch.Tensor:\n assert not mask, \"Can not use a mask in a gaussian action head\"\n means = self.linear_layer(input_data)\n # Unsqueeze many times to get to the same shape\n logstd = self.log_std[(None,) * (len(means.shape) - 1)]\n mean_view, logstd = torch.broadcast_tensors(means, logstd)\n return torch.stack([mean_view, logstd], dim=-1)\n def logprob(self, action_sample: torch.Tensor, pd_params: torch.Tensor) -> torch.Tensor:\n \"\"\"Log-likelihood\"\"\"\n means = pd_params[..., 0]\n log_std = pd_params[..., 1]",
+ "type": "code",
+ "location": "/lib/action_head.py:64-89"
+ },
+ "123": {
+ "file_id": 5,
+ "content": "Initializes an action head with specified input and output dimensions, sets the linear layer's weight and bias using orthogonal initialization and assigns them to None respectively.\nDefines methods to reset parameters for the action head, forward propagates data through linear layer to obtain means, and calculates log probabilities of action samples given parameters.",
+ "type": "comment"
+ },
+ "124": {
+ "file_id": 5,
+ "content": " std = torch.exp(log_std)\n z_score = (action_sample - means) / std\n return -(0.5 * ((z_score ** 2 + self.LOG2PI).sum(dim=-1)) + log_std.sum(dim=-1))\n def entropy(self, pd_params: torch.Tensor) -> torch.Tensor:\n \"\"\"\n Categorical distribution entropy calculation - sum probs * log(probs).\n In case of diagonal gaussian distribution - 1/2 log(2 pi e sigma^2)\n \"\"\"\n log_std = pd_params[..., 1]\n return (log_std + 0.5 * (self.LOG2PI + 1)).sum(dim=-1)\n def sample(self, pd_params: torch.Tensor, deterministic: bool = False) -> torch.Tensor:\n means = pd_params[..., 0]\n log_std = pd_params[..., 1]\n if deterministic:\n return means\n else:\n return torch.randn_like(means) * torch.exp(log_std) + means\n def kl_divergence(self, params_q: torch.Tensor, params_p: torch.Tensor) -> torch.Tensor:\n \"\"\"\n Categorical distribution KL divergence calculation\n KL(Q || P) = sum Q_i log (Q_i / P_i)\n Formula is:",
+ "type": "code",
+ "location": "/lib/action_head.py:91-119"
+ },
+ "125": {
+ "file_id": 5,
+ "content": "Line 90: Calculate standard deviation from log_std\nLine 116: Calculate z-score for action sample\nLine 117: Return negative of sum of log probabilities\n\nComment for code: This code calculates the categorical distribution entropy, sample from a diagonal Gaussian distribution, and KL divergence for two sets of parameters.",
+ "type": "comment"
+ },
+ "126": {
+ "file_id": 5,
+ "content": " log(sigma_p) - log(sigma_q) + (sigma_q^2 + (mu_q - mu_p)^2))/(2 * sigma_p^2)\n \"\"\"\n means_q = params_q[..., 0]\n log_std_q = params_q[..., 1]\n means_p = params_p[..., 0]\n log_std_p = params_p[..., 1]\n std_q = torch.exp(log_std_q)\n std_p = torch.exp(log_std_p)\n kl_div = log_std_p - log_std_q + (std_q ** 2 + (means_q - means_p) ** 2) / (2.0 * std_p ** 2) - 0.5\n return kl_div.sum(dim=-1, keepdim=True)\nclass CategoricalActionHead(ActionHead):\n \"\"\"Action head with categorical actions\"\"\"\n def __init__(\n self, input_dim: int, shape: Tuple[int], num_actions: int, builtin_linear_layer: bool = True, temperature: float = 1.0\n ):\n super().__init__()\n self.input_dim = input_dim\n self.num_actions = num_actions\n self.output_shape = shape + (num_actions,)\n self.temperature = temperature\n if builtin_linear_layer:\n self.linear_layer = nn.Linear(input_dim, np.prod(self.output_shape))\n else:",
+ "type": "code",
+ "location": "/lib/action_head.py:120-151"
+ },
+ "127": {
+ "file_id": 5,
+ "content": "This code defines an ActionHead class with categorical actions. It initializes the action head with input_dim, num_actions, shape, builtin_linear_layer (optional), and temperature parameters. If builtin_linear_layer is True, it uses a linear layer for feature extraction. The output shape is determined by the input shape and number of actions.",
+ "type": "comment"
+ },
+ "128": {
+ "file_id": 5,
+ "content": " assert (\n input_dim == num_actions\n ), f\"If input_dim ({input_dim}) != num_actions ({num_actions}), you need a linear layer to convert them.\"\n self.linear_layer = None\n def reset_parameters(self):\n if self.linear_layer is not None:\n init.orthogonal_(self.linear_layer.weight, gain=0.01)\n init.constant_(self.linear_layer.bias, 0.0)\n finit.fan_in_linear(self.linear_layer, scale=0.01)\n def forward(self, input_data: torch.Tensor, mask=None) -> Any:\n if self.linear_layer is not None:\n flat_out = self.linear_layer(input_data)\n else:\n flat_out = input_data\n shaped_out = flat_out.reshape(flat_out.shape[:-1] + self.output_shape)\n shaped_out /= self.temperature\n if mask is not None:\n shaped_out[~mask] = LOG0\n # Convert to float32 to avoid RuntimeError: \"log_softmax_lastdim_kernel_impl\" not implemented for 'Half'\n return F.log_softmax(shaped_out.float(), dim=-1)",
+ "type": "code",
+ "location": "/lib/action_head.py:152-174"
+ },
+ "129": {
+ "file_id": 5,
+ "content": "This code defines a class for an action head, which is responsible for outputting action probabilities from input data. It asserts that the input dimension matches the number of actions, and if a linear layer is not None, it initializes its parameters orthogonally with gain 0.01 and sets the bias to 0. The forward function computes the output by either passing the input through a linear layer or using the input directly, reshapes the result based on the output shape, scales the result by temperature, applies a mask if provided, and then returns the log softmax of the shaped output as float32.",
+ "type": "comment"
+ },
+ "130": {
+ "file_id": 5,
+ "content": " def logprob(self, actions: torch.Tensor, logits: torch.Tensor) -> torch.Tensor:\n value = actions.long().unsqueeze(-1)\n value, log_pmf = torch.broadcast_tensors(value, logits)\n value = value[..., :1]\n result = log_pmf.gather(-1, value).squeeze(-1)\n # result is per-entry, still of size self.output_shape[:-1]; we need to reduce of the rest of it.\n for _ in self.output_shape[:-1]:\n result = result.sum(dim=-1)\n return result\n def entropy(self, logits: torch.Tensor) -> torch.Tensor:\n \"\"\"Categorical distribution entropy calculation - sum probs * log(probs)\"\"\"\n probs = torch.exp(logits)\n entropy = -torch.sum(probs * logits, dim=-1)\n # entropy is per-entry, still of size self.output_shape[:-1]; we need to reduce of the rest of it.\n for _ in self.output_shape[:-1]:\n entropy = entropy.sum(dim=-1)\n return entropy\n def sample(self, logits: torch.Tensor, deterministic: bool = False) -> Any:\n if deterministic:",
+ "type": "code",
+ "location": "/lib/action_head.py:176-196"
+ },
+ "131": {
+ "file_id": 5,
+ "content": "The code contains three functions: `logprob`, `entropy`, and `sample`. \n- The `logprob` function calculates the log probability of a given set of actions against the provided logits. It returns the result in torch format.\n- The `entropy` function calculates the entropy of a categorical distribution from the given logits. It also returns the entropy in torch format.\n- The `sample` function generates a sample from the distribution represented by the given logits. If `deterministic` is set to True, it will always return the same value.",
+ "type": "comment"
+ },
+ "132": {
+ "file_id": 5,
+ "content": " return torch.argmax(logits, dim=-1)\n else:\n # Gumbel-Softmax trick.\n u = torch.rand_like(logits)\n # In float16, if you have around 2^{float_mantissa_bits} logits, sometimes you'll sample 1.0\n # Then the log(-log(1.0)) will give -inf when it should give +inf\n # This is a silly hack to get around that.\n # This hack does not skew the probability distribution, because this event can't possibly win the argmax.\n u[u == 1.0] = 0.999\n return torch.argmax(logits - torch.log(-torch.log(u)), dim=-1)\n def kl_divergence(self, logits_q: torch.Tensor, logits_p: torch.Tensor) -> torch.Tensor:\n \"\"\"\n Categorical distribution KL divergence calculation\n KL(Q || P) = sum Q_i log (Q_i / P_i)\n When talking about logits this is:\n sum exp(Q_i) * (Q_i - P_i)\n \"\"\"\n kl = (torch.exp(logits_q) * (logits_q - logits_p)).sum(-1, keepdim=True)\n # kl is per-entry, still of size self.output_shape; we need to reduce of the rest of it.",
+ "type": "code",
+ "location": "/lib/action_head.py:197-217"
+ },
+ "133": {
+ "file_id": 5,
+ "content": "Returns the index with maximum value in logits\"\n\"Applies Gumbel-Softmax trick for training with float16 precision\"\n\"Calculates KL divergence between two categorical distributions using logits",
+ "type": "comment"
+ },
+ "134": {
+ "file_id": 5,
+ "content": " for _ in self.output_shape[:-1]:\n kl = kl.sum(dim=-2) # dim=-2 because we use keepdim=True above.\n return kl\nclass DictActionHead(nn.ModuleDict):\n \"\"\"Action head with multiple sub-actions\"\"\"\n def reset_parameters(self):\n for subhead in self.values():\n subhead.reset_parameters()\n def forward(self, input_data: torch.Tensor, **kwargs) -> Any:\n \"\"\"\n :param kwargs: each kwarg should be a dict with keys corresponding to self.keys()\n e.g. if this ModuleDict has submodules keyed by 'A', 'B', and 'C', we could call:\n forward(input_data, foo={'A': True, 'C': False}, bar={'A': 7}}\n Then children will be called with:\n A: forward(input_data, foo=True, bar=7)\n B: forward(input_data)\n C: forward(input_Data, foo=False)\n \"\"\"\n result = {}\n for head_name, subhead in self.items():\n head_kwargs = {\n kwarg_name: kwarg[head_name]",
+ "type": "code",
+ "location": "/lib/action_head.py:218-243"
+ },
+ "135": {
+ "file_id": 5,
+ "content": "This code defines a DictActionHead class, which is an action head with multiple sub-actions. The class has methods to reset parameters and perform forward pass. During the forward pass, it takes input data and optional keyword arguments, and calls the forward method on each of its submodules using the provided keyword arguments. The results from all submodules are stored in a dictionary.",
+ "type": "comment"
+ },
+ "136": {
+ "file_id": 5,
+ "content": " for kwarg_name, kwarg in kwargs.items()\n if kwarg is not None and head_name in kwarg\n }\n result[head_name] = subhead(input_data, **head_kwargs)\n return result\n def logprob(self, actions: torch.Tensor, logits: torch.Tensor) -> torch.Tensor:\n return sum(subhead.logprob(actions[k], logits[k]) for k, subhead in self.items())\n def sample(self, logits: torch.Tensor, deterministic: bool = False) -> Any:\n return {k: subhead.sample(logits[k], deterministic) for k, subhead in self.items()}\n def entropy(self, logits: torch.Tensor) -> torch.Tensor:\n return sum(subhead.entropy(logits[k]) for k, subhead in self.items())\n def kl_divergence(self, logits_q: torch.Tensor, logits_p: torch.Tensor) -> torch.Tensor:\n return sum(subhead.kl_divergence(logits_q[k], logits_p[k]) for k, subhead in self.items())\ndef make_action_head(ac_space: ValType, pi_out_size: int, temperature: float = 1.0):\n \"\"\"Helper function to create an action head corresponding to the environment action space\"\"\"",
+ "type": "code",
+ "location": "/lib/action_head.py:244-264"
+ },
+ "137": {
+ "file_id": 5,
+ "content": "The code defines an action head class that contains sub-heads corresponding to the environment's action space. It supports logprob, sample, entropy, and kl_divergence methods on a batch of actions and logits. The make_action_head function creates an action head based on the given action space and output size of the policy network.",
+ "type": "comment"
+ },
+ "138": {
+ "file_id": 5,
+ "content": " if isinstance(ac_space, TensorType):\n if isinstance(ac_space.eltype, Discrete):\n return CategoricalActionHead(pi_out_size, ac_space.shape, ac_space.eltype.n, temperature=temperature)\n elif isinstance(ac_space.eltype, Real):\n if temperature != 1.0:\n logging.warning(\"Non-1 temperature not implemented for DiagGaussianActionHead.\")\n assert len(ac_space.shape) == 1, \"Nontrivial shapes not yet implemented.\"\n return DiagGaussianActionHead(pi_out_size, ac_space.shape[0])\n elif isinstance(ac_space, DictType):\n return DictActionHead({k: make_action_head(v, pi_out_size, temperature) for k, v in ac_space.items()})\n raise NotImplementedError(f\"Action space of type {type(ac_space)} is not supported\")",
+ "type": "code",
+ "location": "/lib/action_head.py:265-275"
+ },
+ "139": {
+ "file_id": 5,
+ "content": "Checks the type of action space and returns a corresponding ActionHead object. Supports Discrete, Real, and DictType action spaces. Non-1 temperature and nontrivial shape actions are not implemented yet.",
+ "type": "comment"
+ },
+ "140": {
+ "file_id": 6,
+ "content": "/lib/action_mapping.py",
+ "type": "filepath"
+ },
+ "141": {
+ "file_id": 6,
+ "content": "This code organizes player inputs in a video game using action mappings, manages camera actions, handles assertion checks and conversions for different action spaces like buttons, cameras, inventory keys, and factored action space mapping.",
+ "type": "summary"
+ },
+ "142": {
+ "file_id": 6,
+ "content": "import abc\nimport itertools\nfrom collections import OrderedDict\nfrom typing import Dict, List\nimport numpy as np\nfrom gym3.types import DictType, Discrete, TensorType\nfrom lib.actions import Buttons\nclass ActionMapping(abc.ABC):\n \"\"\"Class that maps between the standard MC factored action space and a new one you define!\n :param n_camera_bins: Need to specify this to define the original ac space for stats code\n \"\"\"\n # This is the default buttons groups, it can be changed for your action space\n BUTTONS_GROUPS = OrderedDict(\n hotbar=[\"none\"] + [f\"hotbar.{i}\" for i in range(1, 10)],\n fore_back=[\"none\", \"forward\", \"back\"],\n left_right=[\"none\", \"left\", \"right\"],\n sprint_sneak=[\"none\", \"sprint\", \"sneak\"],\n use=[\"none\", \"use\"],\n drop=[\"none\", \"drop\"],\n attack=[\"none\", \"attack\"],\n jump=[\"none\", \"jump\"],\n )\n def __init__(self, n_camera_bins: int = 11):\n assert n_camera_bins % 2 == 1, \"n_camera_bins should be odd\"\n self.n_camera_bins = n_camera_bins",
+ "type": "code",
+ "location": "/lib/action_mapping.py:1-32"
+ },
+ "143": {
+ "file_id": 6,
+ "content": "This code defines a class \"ActionMapping\" that maps between the standard Minecraft action space and a new one defined by the user. It uses ordered dictionaries to represent different action groups such as buttons, and requires an odd number of camera bins for initialization.",
+ "type": "comment"
+ },
+ "144": {
+ "file_id": 6,
+ "content": " self.camera_null_bin = n_camera_bins // 2\n self.stats_ac_space = DictType(\n **{\n \"buttons\": TensorType(shape=(len(Buttons.ALL),), eltype=Discrete(2)),\n \"camera\": TensorType(shape=(2,), eltype=Discrete(n_camera_bins)),\n }\n )\n @abc.abstractmethod\n def from_factored(self, ac: Dict) -> Dict:\n \"\"\"Converts a factored action (ac) to the new space\n :param ac: Dictionary of actions that must have a batch dimension\n \"\"\"\n pass\n @abc.abstractmethod\n def to_factored(self, ac: Dict) -> Dict:\n \"\"\"Converts an action in the new space (ac) to the factored action space.\n :param ac: Dictionary of actions that must have a batch dimension\n \"\"\"\n pass\n @abc.abstractmethod\n def get_action_space_update(self):\n \"\"\"Return a magym (gym3) action space. This will be used to update the env action space.\"\"\"\n pass\n @abc.abstractmethod\n def get_zero_action(self):\n \"\"\"Return the zero or null action for this action space\"\"\"",
+ "type": "code",
+ "location": "/lib/action_mapping.py:33-64"
+ },
+ "145": {
+ "file_id": 6,
+ "content": "This code defines an abstract base class for mapping actions to a new space. It includes methods for converting factored actions to the new space, converting actions in the new space back to the factored action space, returning a gym action space for updating the environment, and returning the null or zero action for this action space.",
+ "type": "comment"
+ },
+ "146": {
+ "file_id": 6,
+ "content": " pass\n def factored_buttons_to_groups(self, ac_buttons: np.ndarray, button_group: List[str]) -> List[str]:\n \"\"\"For a mutually exclusive group of buttons in button_group, find which option\n in the group was chosen. Assumes that each button group has the option of 'none'\n meaning that no button in the group was pressed.\n :param ac_buttons: button actions from the factored action space. Should dims [B, len(Buttons.ALL)]\n :param button_group: List of buttons in a mutually exclusive group. Each item in the\n list should appear in Buttons.ALL except for the special case 'none' which means\n no button in the group was pressed. e.g. ['none', 'forward', 'back']. For now\n 'none' must be the first element of button_group\n Returns a list of length B, where each element is an item from button_group.\n \"\"\"\n assert ac_buttons.shape[1] == len(\n Buttons.ALL\n ), f\"There should be {len(Buttons.ALL)} buttons in the factored buttons space\"",
+ "type": "code",
+ "location": "/lib/action_mapping.py:65-82"
+ },
+ "147": {
+ "file_id": 6,
+ "content": "This function takes in button actions from a factored action space and a list of mutually exclusive buttons. It returns a list indicating which button (or none if no button was pressed) was chosen for each item in the input array, given that each group has the option of 'none'. The function checks if the shape of the input matches the expected number of buttons.",
+ "type": "comment"
+ },
+ "148": {
+ "file_id": 6,
+ "content": " assert button_group[0] == \"none\", \"This function only works if 'none' is in button_group\"\n # Actions in ac_buttons with order according to button_group\n group_indices = [Buttons.ALL.index(b) for b in button_group if b != \"none\"]\n ac_choices = ac_buttons[:, group_indices]\n # Special cases for forward/back, left/right where mutual press means do neither\n if \"forward\" in button_group and \"back\" in button_group:\n ac_choices[np.all(ac_choices, axis=-1)] = 0\n if \"left\" in button_group and \"right\" in button_group:\n ac_choices[np.all(ac_choices, axis=-1)] = 0\n ac_non_zero = np.where(ac_choices)\n ac_choice = [\"none\" for _ in range(ac_buttons.shape[0])]\n # Iterate over the non-zero indices so that if two buttons in a group were pressed at the same time\n # we give priority to the button later in the group. E.g. if hotbar.1 and hotbar.2 are pressed during the same\n # timestep, hotbar.2 is marked as pressed",
+ "type": "code",
+ "location": "/lib/action_mapping.py:83-97"
+ },
+ "149": {
+ "file_id": 6,
+ "content": "Ensures function works only when 'none' is in button_group. Maps non-zero action button indices to corresponding actions, handling special cases of mutual press for forward/back and left/right. Prioritizes later buttons in group if pressed at the same time.",
+ "type": "comment"
+ },
+ "150": {
+ "file_id": 6,
+ "content": " for index, action in zip(ac_non_zero[0], ac_non_zero[1]):\n ac_choice[index] = button_group[action + 1] # the zero'th index will mean no button pressed\n return ac_choice\nclass IDMActionMapping(ActionMapping):\n \"\"\"For IDM, but essentially this is just an identity mapping\"\"\"\n def from_factored(self, ac: Dict) -> Dict:\n return ac\n def to_factored(self, ac: Dict) -> Dict:\n return ac\n def get_action_space_update(self):\n \"\"\"Return a magym (gym3) action space. This will be used to update the env action space.\"\"\"\n return {\n \"buttons\": TensorType(shape=(len(Buttons.ALL),), eltype=Discrete(2)),\n \"camera\": TensorType(shape=(2,), eltype=Discrete(self.n_camera_bins)),\n }\n def get_zero_action(self):\n raise NotImplementedError()\nclass CameraHierarchicalMapping(ActionMapping):\n \"\"\"Buttons are joint as in ButtonsJointMapping, but now a camera on/off meta action is added into this joint space.\n When this meta action is triggered, the separate camera head chooses a camera action which is also now a joint space.",
+ "type": "code",
+ "location": "/lib/action_mapping.py:98-122"
+ },
+ "151": {
+ "file_id": 6,
+ "content": "This code defines two classes, IDMActionMapping and CameraHierarchicalMapping, which are action mappings used in a video game. The classes define methods to convert actions between factored and non-factored representations, get an action space update, and handle zero actions. These classes seem to be part of a larger system for mapping player inputs to actions in the game environment.",
+ "type": "comment"
+ },
+ "152": {
+ "file_id": 6,
+ "content": " :param n_camera_bins: number of camera bins in the factored space\n \"\"\"\n # Add camera meta action to BUTTONS_GROUPS\n BUTTONS_GROUPS = ActionMapping.BUTTONS_GROUPS.copy()\n BUTTONS_GROUPS[\"camera\"] = [\"none\", \"camera\"]\n BUTTONS_COMBINATIONS = list(itertools.product(*BUTTONS_GROUPS.values())) + [\"inventory\"]\n BUTTONS_COMBINATION_TO_IDX = {comb: i for i, comb in enumerate(BUTTONS_COMBINATIONS)}\n BUTTONS_IDX_TO_COMBINATION = {i: comb for i, comb in enumerate(BUTTONS_COMBINATIONS)}\n def __init__(self, *args, **kwargs):\n super().__init__(*args, **kwargs)\n self.camera_groups = OrderedDict(\n camera_x=[f\"camera_x{i}\" for i in range(self.n_camera_bins)],\n camera_y=[f\"camera_y{i}\" for i in range(self.n_camera_bins)],\n )\n self.camera_combinations = list(itertools.product(*self.camera_groups.values()))\n self.camera_combination_to_idx = {comb: i for i, comb in enumerate(self.camera_combinations)}\n self.camera_idx_to_combination = {i: comb for i, comb in enumerate(self.camera_combinations)}",
+ "type": "code",
+ "location": "/lib/action_mapping.py:124-142"
+ },
+ "153": {
+ "file_id": 6,
+ "content": "This code adds camera meta actions to the BUTTONS_GROUPS and defines functions for mapping between button combinations, indices, and names.",
+ "type": "comment"
+ },
+ "154": {
+ "file_id": 6,
+ "content": " self.camera_null_idx = self.camera_combination_to_idx[\n (f\"camera_x{self.camera_null_bin}\", f\"camera_y{self.camera_null_bin}\")\n ]\n self._null_action = {\n \"buttons\": self.BUTTONS_COMBINATION_TO_IDX[tuple(\"none\" for _ in range(len(self.BUTTONS_GROUPS)))]\n }\n self._precompute_to_factored()\n def _precompute_to_factored(self):\n \"\"\"Precompute the joint action -> factored action matrix.\"\"\"\n button_dim = self.stats_ac_space[\"buttons\"].size\n self.BUTTON_IDX_TO_FACTORED = np.zeros((len(self.BUTTONS_IDX_TO_COMBINATION), button_dim), dtype=int)\n self.BUTTON_IDX_TO_CAMERA_META_OFF = np.zeros((len(self.BUTTONS_IDX_TO_COMBINATION)), dtype=bool)\n self.CAMERA_IDX_TO_FACTORED = np.zeros((len(self.camera_idx_to_combination), 2), dtype=int)\n # Pre compute Buttons\n for jnt_ac, button_comb in self.BUTTONS_IDX_TO_COMBINATION.items():\n new_button_ac = np.zeros(len(Buttons.ALL), dtype=\"i\")\n if button_comb == \"inventory\":",
+ "type": "code",
+ "location": "/lib/action_mapping.py:143-161"
+ },
+ "155": {
+ "file_id": 6,
+ "content": "Code chunk sets up arrays for button and camera action mappings.\nThe code defines button and camera indices, initializes arrays to store the factored actions for each joint action, and begins processing the button combinations.",
+ "type": "comment"
+ },
+ "156": {
+ "file_id": 6,
+ "content": " new_button_ac[Buttons.ALL.index(\"inventory\")] = 1\n else:\n for group_choice in button_comb[:-1]: # Last one is camera\n if group_choice != \"none\":\n new_button_ac[Buttons.ALL.index(group_choice)] = 1\n if button_comb[-1] != \"camera\": # This means camera meta action is off\n self.BUTTON_IDX_TO_CAMERA_META_OFF[jnt_ac] = True\n self.BUTTON_IDX_TO_FACTORED[jnt_ac] = new_button_ac\n # Pre compute camera\n for jnt_ac, camera_comb in self.camera_idx_to_combination.items():\n new_camera_ac = np.ones((2), dtype=\"i\") * self.camera_null_bin\n new_camera_ac[0] = self.camera_groups[\"camera_x\"].index(camera_comb[0])\n new_camera_ac[1] = self.camera_groups[\"camera_y\"].index(camera_comb[1])\n self.CAMERA_IDX_TO_FACTORED[jnt_ac] = new_camera_ac\n def from_factored(self, ac: Dict) -> Dict:\n \"\"\"Converts a factored action (ac) to the new space. Assumes ac has a batch dim\"\"\"",
+ "type": "code",
+ "location": "/lib/action_mapping.py:162-180"
+ },
+ "157": {
+ "file_id": 6,
+ "content": "Code is creating factored representations of action and camera combinations for each joint. It initializes new_button_ac to 1 for all inventory buttons, then checks if any other groups are selected and assigns those indices to 1 in new_button_ac. If the last combination is not \"camera\", it sets the camera_meta_off flag. Then it creates new_camera_ac with indices based on the camera group combinations and stores these factored representations for both action and camera in their respective dictionaries.",
+ "type": "comment"
+ },
+ "158": {
+ "file_id": 6,
+ "content": " assert ac[\"camera\"].ndim == 2, f\"bad camera label, {ac['camera']}\"\n assert ac[\"buttons\"].ndim == 2, f\"bad buttons label, {ac['buttons']}\"\n # Get button choices for everything but camera\n choices_by_group = OrderedDict(\n (k, self.factored_buttons_to_groups(ac[\"buttons\"], v)) for k, v in self.BUTTONS_GROUPS.items() if k != \"camera\"\n )\n # Set camera \"on off\" action based on whether non-null camera action was given\n camera_is_null = np.all(ac[\"camera\"] == self.camera_null_bin, axis=1)\n choices_by_group[\"camera\"] = [\"none\" if is_null else \"camera\" for is_null in camera_is_null]\n new_button_ac = []\n new_camera_ac = []\n for i in range(ac[\"buttons\"].shape[0]):\n # Buttons\n key = tuple([v[i] for v in choices_by_group.values()])\n if ac[\"buttons\"][i, Buttons.ALL.index(\"inventory\")] == 1:\n key = \"inventory\"\n new_button_ac.append(self.BUTTONS_COMBINATION_TO_IDX[key])\n # Camera -- inventory is also exclusive with camera",
+ "type": "code",
+ "location": "/lib/action_mapping.py:181-200"
+ },
+ "159": {
+ "file_id": 6,
+ "content": "This code is performing an assertion check to ensure that the \"camera\" and \"buttons\" labels have the correct dimensions. It then creates a dictionary of button choices for each group except camera, sets the camera action based on whether a non-null camera action was given, and finally generates new arrays of button and camera actions based on the choices.",
+ "type": "comment"
+ },
+ "160": {
+ "file_id": 6,
+ "content": " if key == \"inventory\":\n key = (\n f\"camera_x{self.camera_null_bin}\",\n f\"camera_y{self.camera_null_bin}\",\n )\n else:\n key = (f\"camera_x{ac['camera'][i][0]}\", f\"camera_y{ac['camera'][i][1]}\")\n new_camera_ac.append(self.camera_combination_to_idx[key])\n return dict(\n buttons=np.array(new_button_ac)[:, None],\n camera=np.array(new_camera_ac)[:, None],\n )\n def to_factored(self, ac: Dict) -> Dict:\n \"\"\"Converts an action in the new space (ac) to the factored action space. Assumes ac has a batch dim\"\"\"\n assert ac[\"camera\"].shape[-1] == 1\n assert ac[\"buttons\"].shape[-1] == 1\n new_button_ac = self.BUTTON_IDX_TO_FACTORED[np.squeeze(ac[\"buttons\"], -1)]\n camera_off = self.BUTTON_IDX_TO_CAMERA_META_OFF[np.squeeze(ac[\"buttons\"], -1)]\n new_camera_ac = self.CAMERA_IDX_TO_FACTORED[np.squeeze(ac[\"camera\"], -1)]\n new_camera_ac[camera_off] = self.camera_null_bin",
+ "type": "code",
+ "location": "/lib/action_mapping.py:201-223"
+ },
+ "161": {
+ "file_id": 6,
+ "content": "This code is converting an action in the new space (ac) to the factored action space. It first checks if the \"inventory\" key is present, and if so, uses a specific key format. For other keys, it uses a different key format. Then it appends the camera indices to a list. The function returns a dictionary with buttons and cameras in the new action space. If the input action has a batch dimension, the code asserts that the shape of both \"camera\" and \"buttons\" are 1, squeezes them, maps the button indices to factored action space, calculates camera offsets, maps the camera indices to factored action space, and replaces the null camera values with \"camera_null_bin\".",
+ "type": "comment"
+ },
+ "162": {
+ "file_id": 6,
+ "content": " return dict(buttons=new_button_ac, camera=new_camera_ac)\n def get_action_space_update(self):\n return {\n \"camera\": TensorType(shape=(1,), eltype=Discrete(len(self.camera_combinations))),\n \"buttons\": TensorType(shape=(1,), eltype=Discrete(len(self.BUTTONS_COMBINATIONS))),\n }\n def get_zero_action(self):\n return self._null_action",
+ "type": "code",
+ "location": "/lib/action_mapping.py:225-234"
+ },
+ "163": {
+ "file_id": 6,
+ "content": "This code defines a class with three methods. The first method returns a dictionary containing the \"buttons\" and \"camera\" actions. The second method specifies the action space update, defining the shape and type for both \"camera\" and \"buttons\". The third method returns a null action.",
+ "type": "comment"
+ },
+ "164": {
+ "file_id": 7,
+ "content": "/lib/actions.py",
+ "type": "filepath"
+ },
+ "165": {
+ "file_id": 7,
+ "content": "The code includes Minecraft action classes with quantization schemes and an ActionTransformer, along with three functions for mapping item IDs to names and converting environment data to policy format.",
+ "type": "summary"
+ },
+ "166": {
+ "file_id": 7,
+ "content": "import attr\nimport minerl.herobraine.hero.mc as mc\nimport numpy as np\nfrom lib.minecraft_util import store_args\nclass Buttons:\n ATTACK = \"attack\"\n BACK = \"back\"\n FORWARD = \"forward\"\n JUMP = \"jump\"\n LEFT = \"left\"\n RIGHT = \"right\"\n SNEAK = \"sneak\"\n SPRINT = \"sprint\"\n USE = \"use\"\n DROP = \"drop\"\n INVENTORY = \"inventory\"\n ALL = [\n ATTACK,\n BACK,\n FORWARD,\n JUMP,\n LEFT,\n RIGHT,\n SNEAK,\n SPRINT,\n USE,\n DROP,\n INVENTORY,\n ] + [f\"hotbar.{i}\" for i in range(1, 10)]\nclass SyntheticButtons:\n # Composite / scripted actions\n CHANNEL_ATTACK = \"channel-attack\"\n ALL = [CHANNEL_ATTACK]\nclass QuantizationScheme:\n LINEAR = \"linear\"\n MU_LAW = \"mu_law\"\n@attr.s(auto_attribs=True)\nclass CameraQuantizer:\n \"\"\"\n A camera quantizer that discretizes and undiscretizes a continuous camera input with y (pitch) and x (yaw) components.\n Parameters:\n - camera_binsize: The size of the bins used for quantization. In case of mu-law quantization, it corresponds to the average binsize.",
+ "type": "code",
+ "location": "/lib/actions.py:1-54"
+ },
+ "167": {
+ "file_id": 7,
+ "content": "This code defines classes for various action types and a camera quantizer in the context of Minecraft gameplay. The Buttons class represents different action buttons like attack, jump, inventory, etc. SyntheticButtons includes composite/scripted actions. QuantizationScheme has options for linear or mu-law quantization. CameraQuantizer is responsible for discretizing and undiscretizing continuous camera input (pitch and yaw).",
+ "type": "comment"
+ },
+ "168": {
+ "file_id": 7,
+ "content": " - camera_maxval: The maximum value of the camera action.\n - quantization_scheme: The quantization scheme to use. Currently, two quantization schemes are supported:\n - Linear quantization (default): Camera actions are split uniformly into discrete bins\n - Mu-law quantization: Transforms the camera action using mu-law encoding (https://en.wikipedia.org/wiki/%CE%9C-law_algorithm)\n followed by the same quantization scheme used by the linear scheme.\n - mu: Mu is the parameter that defines the curvature of the mu-law encoding. Higher values of\n mu will result in a sharper transition near zero. Below are some reference values listed\n for choosing mu given a constant maxval and a desired max_precision value.\n maxval = 10 | max_precision = 0.5 | μ ≈ 2.93826\n maxval = 10 | max_precision = 0.4 | μ ≈ 4.80939\n maxval = 10 | max_precision = 0.25 | μ ≈ 11.4887\n maxval = 20 | max_precision = 0.5 | μ ≈ 2.7\n maxval = 20 | max_precision = 0.4 | μ ≈ 4.39768\n maxval = 20 | max_precision = 0.25 | μ ≈ 10.3194",
+ "type": "code",
+ "location": "/lib/actions.py:55-68"
+ },
+ "169": {
+ "file_id": 7,
+ "content": "This code defines two quantization schemes for camera actions: linear and mu-law. It also provides reference values for the mu parameter based on maxval and desired max_precision for mu-law encoding.",
+ "type": "comment"
+ },
+ "170": {
+ "file_id": 7,
+ "content": " maxval = 40 | max_precision = 0.5 | μ ≈ 2.60780\n maxval = 40 | max_precision = 0.4 | μ ≈ 4.21554\n maxval = 40 | max_precision = 0.25 | μ ≈ 9.81152\n \"\"\"\n camera_maxval: int\n camera_binsize: int\n quantization_scheme: str = attr.ib(\n default=QuantizationScheme.LINEAR,\n validator=attr.validators.in_([QuantizationScheme.LINEAR, QuantizationScheme.MU_LAW]),\n )\n mu: float = attr.ib(default=5)\n def discretize(self, xy):\n xy = np.clip(xy, -self.camera_maxval, self.camera_maxval)\n if self.quantization_scheme == QuantizationScheme.MU_LAW:\n xy = xy / self.camera_maxval\n v_encode = np.sign(xy) * (np.log(1.0 + self.mu * np.abs(xy)) / np.log(1.0 + self.mu))\n v_encode *= self.camera_maxval\n xy = v_encode\n # Quantize using linear scheme\n return np.round((xy + self.camera_maxval) / self.camera_binsize).astype(np.int64)\n def undiscretize(self, xy):\n xy = xy * self.camera_binsize - self.camera_maxval",
+ "type": "code",
+ "location": "/lib/actions.py:69-95"
+ },
+ "171": {
+ "file_id": 7,
+ "content": "This code defines a class with properties for camera max value, bin size, quantization scheme (linear or Mu-Law), and mu value. The discretize method takes in xy coordinates, clips them within the camera range, applies the specified quantization scheme to discretize the values, and returns the rounded values as integers. The undiscretize method takes in discretized values and converts them back to their original continuous representation by multiplying with the bin size and subtracting the camera max value.",
+ "type": "comment"
+ },
+ "172": {
+ "file_id": 7,
+ "content": " if self.quantization_scheme == QuantizationScheme.MU_LAW:\n xy = xy / self.camera_maxval\n v_decode = np.sign(xy) * (1.0 / self.mu) * ((1.0 + self.mu) ** np.abs(xy) - 1.0)\n v_decode *= self.camera_maxval\n xy = v_decode\n return xy\nclass ActionTransformer:\n \"\"\"Transforms actions between internal array and minerl env format.\"\"\"\n @store_args\n def __init__(\n self,\n camera_maxval=10,\n camera_binsize=2,\n camera_quantization_scheme=\"linear\",\n camera_mu=5,\n ):\n self.quantizer = CameraQuantizer(\n camera_maxval=camera_maxval,\n camera_binsize=camera_binsize,\n quantization_scheme=camera_quantization_scheme,\n mu=camera_mu,\n )\n def camera_zero_bin(self):\n return self.camera_maxval // self.camera_binsize\n def discretize_camera(self, xy):\n return self.quantizer.discretize(xy)\n def undiscretize_camera(self, pq):\n return self.quantizer.undiscretize(pq)",
+ "type": "code",
+ "location": "/lib/actions.py:97-130"
+ },
+ "173": {
+ "file_id": 7,
+ "content": "This code defines a class called `ActionTransformer` that transforms actions between internal arrays and the MinerL environment format. It includes methods for discretizing and undiscretizing camera data, as well as calculating a zero bin value based on camera binsize. If the quantization scheme is set to \"mu_law\", it applies the mu-law quantization method to the input data.",
+ "type": "comment"
+ },
+ "174": {
+ "file_id": 7,
+ "content": " def item_embed_id_to_name(self, item_id):\n return mc.MINERL_ITEM_MAP[item_id]\n def dict_to_numpy(self, acs):\n \"\"\"\n Env format to policy output format.\n \"\"\"\n act = {\n \"buttons\": np.stack([acs.get(k, 0) for k in Buttons.ALL], axis=-1),\n \"camera\": self.discretize_camera(acs[\"camera\"]),\n }\n if not self.human_spaces:\n act.update(\n {\n \"synthetic_buttons\": np.stack([acs[k] for k in SyntheticButtons.ALL], axis=-1),\n \"place\": self.item_embed_name_to_id(acs[\"place\"]),\n \"equip\": self.item_embed_name_to_id(acs[\"equip\"]),\n \"craft\": self.item_embed_name_to_id(acs[\"craft\"]),\n }\n )\n return act\n def numpy_to_dict(self, acs):\n \"\"\"\n Numpy policy output to env-compatible format.\n \"\"\"\n assert acs[\"buttons\"].shape[-1] == len(\n Buttons.ALL\n ), f\"Mismatched actions: {acs}; expected {len(Buttons.ALL)}:\\n( {Buttons.ALL})\"",
+ "type": "code",
+ "location": "/lib/actions.py:132-160"
+ },
+ "175": {
+ "file_id": 7,
+ "content": "The code contains three functions:\n\n1. item_embed_id_to_name(): This function converts an item ID to its name using the mc.MINERL_ITEM_MAP dictionary.\n2. dict_to_numpy(): This function transforms environment format data to policy output format, creating a dictionary \"act\" containing buttons and camera values in numpy array format. If human-spaces is False, it adds synthetic_buttons, place, equip, and craft values as well.\n3. numpy_to_dict(): This function converts numpy policy output to an environment-compatible format, ensuring the buttons shape matches the expected size.",
+ "type": "comment"
+ },
+ "176": {
+ "file_id": 7,
+ "content": " out = {name: acs[\"buttons\"][..., i] for (i, name) in enumerate(Buttons.ALL)}\n out[\"camera\"] = self.undiscretize_camera(acs[\"camera\"])\n return out\n def policy2env(self, acs):\n acs = self.numpy_to_dict(acs)\n return acs\n def env2policy(self, acs):\n nbatch = acs[\"camera\"].shape[0]\n dummy = np.zeros((nbatch,))\n out = {\n \"camera\": self.discretize_camera(acs[\"camera\"]),\n \"buttons\": np.stack([acs.get(k, dummy) for k in Buttons.ALL], axis=-1),\n }\n return out",
+ "type": "code",
+ "location": "/lib/actions.py:161-178"
+ },
+ "177": {
+ "file_id": 7,
+ "content": "The code defines three methods: \"undiscretize_camera\", \"numpy_to_dict\", and \"discretize_camera\". It converts a camera array to its undiscretized form, converts numpy arrays to dictionaries, and converts an undiscretized camera array back into discretized form, respectively.",
+ "type": "comment"
+ },
+ "178": {
+ "file_id": 8,
+ "content": "/lib/impala_cnn.py",
+ "type": "filepath"
+ },
+ "179": {
+ "file_id": 8,
+ "content": "The ImpalaCNN architecture is created with optional group normalization, allowing for customizable input shape, downsample stacks, output hidden size, and residual blocks per stack. It inherits from a base class and utilizes 2D convolutional layers for multi-stack classification models.",
+ "type": "summary"
+ },
+ "180": {
+ "file_id": 8,
+ "content": "import math\nfrom copy import deepcopy\nfrom typing import Dict, List, Optional\nfrom torch import nn\nfrom torch.nn import functional as F\nfrom lib import misc\nfrom lib import torch_util as tu\nfrom lib.util import FanInInitReLULayer\nclass CnnBasicBlock(nn.Module):\n \"\"\"\n Residual basic block, as in ImpalaCNN. Preserves channel number and shape\n :param inchan: number of input channels\n :param init_scale: weight init scale multiplier\n \"\"\"\n def __init__(\n self,\n inchan: int,\n init_scale: float = 1,\n log_scope=\"\",\n init_norm_kwargs: Dict = {},\n **kwargs,\n ):\n super().__init__()\n self.inchan = inchan\n s = math.sqrt(init_scale)\n self.conv0 = FanInInitReLULayer(\n self.inchan,\n self.inchan,\n kernel_size=3,\n padding=1,\n init_scale=s,\n log_scope=f\"{log_scope}/conv0\",\n **init_norm_kwargs,\n )\n self.conv1 = FanInInitReLULayer(\n self.inchan,\n self.inchan,",
+ "type": "code",
+ "location": "/lib/impala_cnn.py:1-42"
+ },
+ "181": {
+ "file_id": 8,
+ "content": "This code defines a CnnBasicBlock class for ImpalaCNN, which is a residual basic block that preserves the number of input channels and the shape. It uses FanInInitReLULayer for the convolutional layers and allows adjusting weight initialization scale, log scope, and initialization normalization parameters.",
+ "type": "comment"
+ },
+ "182": {
+ "file_id": 8,
+ "content": " kernel_size=3,\n padding=1,\n init_scale=s,\n log_scope=f\"{log_scope}/conv1\",\n **init_norm_kwargs,\n )\n def forward(self, x):\n x = x + self.conv1(self.conv0(x))\n return x\nclass CnnDownStack(nn.Module):\n \"\"\"\n Downsampling stack from Impala CNN.\n :param inchan: number of input channels\n :param nblock: number of residual blocks after downsampling\n :param outchan: number of output channels\n :param init_scale: weight init scale multiplier\n :param pool: if true, downsample with max pool\n :param post_pool_groups: if not None, normalize with group norm with this many groups\n :param kwargs: remaining kwargs are passed into the blocks and layers\n \"\"\"\n name = \"Impala_CnnDownStack\"\n def __init__(\n self,\n inchan: int,\n nblock: int,\n outchan: int,\n init_scale: float = 1,\n pool: bool = True,\n post_pool_groups: Optional[int] = None,\n log_scope: str = \"\",\n init_norm_kwargs: Dict = {},",
+ "type": "code",
+ "location": "/lib/impala_cnn.py:43-78"
+ },
+ "183": {
+ "file_id": 8,
+ "content": "This code defines two classes: `ImpalaCnnConv1d` and `CnnDownStack`. The `ImpalaCnnConv1d` class represents a 1-dimensional convolutional layer with specific parameters, while the `CnnDownStack` class is a stack of downsampling blocks using the `ImpalaCnnConv1d` as the base. These classes are used for image classification tasks following the Impala CNN architecture.",
+ "type": "comment"
+ },
+ "184": {
+ "file_id": 8,
+ "content": " first_conv_norm=False,\n **kwargs,\n ):\n super().__init__()\n self.inchan = inchan\n self.outchan = outchan\n self.pool = pool\n first_conv_init_kwargs = deepcopy(init_norm_kwargs)\n if not first_conv_norm:\n first_conv_init_kwargs[\"group_norm_groups\"] = None\n first_conv_init_kwargs[\"batch_norm\"] = False\n self.firstconv = FanInInitReLULayer(\n inchan,\n outchan,\n kernel_size=3,\n padding=1,\n log_scope=f\"{log_scope}/firstconv\",\n **first_conv_init_kwargs,\n )\n self.post_pool_groups = post_pool_groups\n if post_pool_groups is not None:\n self.n = nn.GroupNorm(post_pool_groups, outchan)\n self.blocks = nn.ModuleList(\n [\n CnnBasicBlock(\n outchan,\n init_scale=init_scale / math.sqrt(nblock),\n log_scope=f\"{log_scope}/block{i}\",\n init_norm_kwargs=init_norm_kwargs,",
+ "type": "code",
+ "location": "/lib/impala_cnn.py:79-107"
+ },
+ "185": {
+ "file_id": 8,
+ "content": "This code initializes a CNN architecture with optional group normalization. It takes parameters such as input and output channels, pooling size, and whether to use group normalization for the first convolution layer or not. The code also includes a list of blocks, where each block is an instance of CnnBasicBlock.",
+ "type": "comment"
+ },
+ "186": {
+ "file_id": 8,
+ "content": " **kwargs,\n )\n for i in range(nblock)\n ]\n )\n def forward(self, x):\n x = self.firstconv(x)\n if self.pool:\n x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)\n if self.post_pool_groups is not None:\n x = self.n(x)\n x = tu.sequential(self.blocks, x, diag_name=self.name)\n return x\n def output_shape(self, inshape):\n c, h, w = inshape\n assert c == self.inchan\n if self.pool:\n return (self.outchan, (h + 1) // 2, (w + 1) // 2)\n else:\n return (self.outchan, h, w)\nclass ImpalaCNN(nn.Module):\n \"\"\"\n :param inshape: input image shape (height, width, channels)\n :param chans: number of residual downsample stacks. Each element is the number of\n filters per convolution in the stack\n :param outsize: output hidden size\n :param nblock: number of residual blocks per stack. Each block has 2 convs and a residual\n :param init_norm_kwargs: arguments to be passed to convolutional layers. Options can be found",
+ "type": "code",
+ "location": "/lib/impala_cnn.py:108-139"
+ },
+ "187": {
+ "file_id": 8,
+ "content": "This code defines a class for an ImpalaCNN model, which is a residual convolutional neural network. The constructor takes input image shape, number of residual downsample stacks, output hidden size, and number of residual blocks per stack as parameters. The forward method performs the forward pass through the network, and the output_shape method returns the expected output shape given the input shape.",
+ "type": "comment"
+ },
+ "188": {
+ "file_id": 8,
+ "content": " in ypt.model.util:FanInInitReLULayer\n :param dense_init_norm_kwargs: arguments to be passed to convolutional layers. Options can be found\n in ypt.model.util:FanInInitReLULayer\n :param kwargs: remaining kwargs are passed into the CnnDownStacks\n \"\"\"\n name = \"ImpalaCNN\"\n def __init__(\n self,\n inshape: List[int],\n chans: List[int],\n outsize: int,\n nblock: int,\n init_norm_kwargs: Dict = {},\n dense_init_norm_kwargs: Dict = {},\n first_conv_norm=False,\n **kwargs,\n ):\n super().__init__()\n h, w, c = inshape\n curshape = (c, h, w)\n self.stacks = nn.ModuleList()\n for i, outchan in enumerate(chans):\n stack = CnnDownStack(\n curshape[0],\n nblock=nblock,\n outchan=outchan,\n init_scale=math.sqrt(len(chans)),\n log_scope=f\"downstack{i}\",\n init_norm_kwargs=init_norm_kwargs,\n first_conv_norm=first_conv_norm if i == 0 else True,",
+ "type": "code",
+ "location": "/lib/impala_cnn.py:140-171"
+ },
+ "189": {
+ "file_id": 8,
+ "content": "This code defines a class called \"ImpalaCNN\" which inherits from the base class. It takes in parameters such as input shape, number of channels, output size, number of blocks, initialization arguments for normalization layers, and additional keyword arguments. The class initializes a list of CNN downstack modules and sets their configurations based on the input parameters.",
+ "type": "comment"
+ },
+ "190": {
+ "file_id": 8,
+ "content": " **kwargs,\n )\n self.stacks.append(stack)\n curshape = stack.output_shape(curshape)\n self.dense = FanInInitReLULayer(\n misc.intprod(curshape),\n outsize,\n layer_type=\"linear\",\n log_scope=\"imapala_final_dense\",\n init_scale=1.4,\n **dense_init_norm_kwargs,\n )\n self.outsize = outsize\n def forward(self, x):\n b, t = x.shape[:-3]\n x = x.reshape(b * t, *x.shape[-3:])\n x = misc.transpose(x, \"bhwc\", \"bchw\")\n x = tu.sequential(self.stacks, x, diag_name=self.name)\n x = x.reshape(b, t, *x.shape[1:])\n x = tu.flatten_image(x)\n x = self.dense(x)\n return x",
+ "type": "code",
+ "location": "/lib/impala_cnn.py:172-195"
+ },
+ "191": {
+ "file_id": 8,
+ "content": "This code initializes a CNN model with multiple stacked 2D convolutional layers. The output of each stack is used as input to the next stack until the final dense layer for classification.",
+ "type": "comment"
+ },
+ "192": {
+ "file_id": 9,
+ "content": "/lib/masked_attention.py",
+ "type": "filepath"
+ },
+ "193": {
+ "file_id": 9,
+ "content": "The function develops a Masked Attention mechanism for time series data, incorporating parameters and considerations such as input size and mask type, and initializes an object for the masked attention based on these parameters. It defines a Masked Attention class with methods for state initialization, forward propagation, and handling causal masking, returning output and state information.",
+ "type": "summary"
+ },
+ "194": {
+ "file_id": 9,
+ "content": "import functools\nimport torch as th\nfrom torch import nn\nimport lib.xf as xf\nfrom lib.minecraft_util import store_args\nfrom lib.tree_util import tree_map\n@functools.lru_cache()\ndef get_band_diagonal_mask(t: int, T: int, maxlen: int, batchsize: int, device: th.device) -> th.Tensor:\n \"\"\"Returns a band diagonal mask which is causal (upper triangle is masked)\n and such that any frame can only view up to maxlen total past frames\n including the current frame.\n Example Masks: Here 0 means that frame is masked and we mask it by adding a huge number to the attention logits (see orc.xf)\n t = 3, T = 3, maxlen = 3\n T\n t 1 0 0 | mask out T > t\n 1 1 0 |\n 1 1 1 |\n t = 3, T = 6, maxlen = 3\n t 0 1 1 1 0 0 | mask out T > t\n 0 0 1 1 1 0 |\n 0 0 0 1 1 1 |\n Args:\n t: number of rows (presumably number of frames recieving gradient)\n T: number of cols (presumably t + past context that isn't being gradient updated)\n maxlen: maximum number of frames (including current frame) any frame can attend to",
+ "type": "code",
+ "location": "/lib/masked_attention.py:1-31"
+ },
+ "195": {
+ "file_id": 9,
+ "content": "This function returns a band diagonal mask for time series data, ensuring the attention is causal and limited to a specific maximum length. The mask is created based on the number of rows (frames receiving gradient) and columns (total frames including past context).",
+ "type": "comment"
+ },
+ "196": {
+ "file_id": 9,
+ "content": " batchsize: number of masks to return\n device: torch device to place mask on\n Returns:\n Boolean mask of shape (batchsize, t, T)\n \"\"\"\n m = th.ones(t, T, dtype=bool)\n m.tril_(T - t) # Mask out upper triangle\n if maxlen is not None and maxlen < T: # Mask out lower triangle\n m.triu_(T - t - maxlen + 1)\n m_btT = m[None].repeat_interleave(batchsize, dim=0)\n m_btT = m_btT.to(device=device)\n return m_btT\ndef get_mask(first_b11: th.Tensor, state_mask: th.Tensor, t: int, T: int, maxlen: int, heads: int, device) -> th.Tensor:\n \"\"\"Returns a band diagonal mask that respects masking past states (columns 0:T-t inclusive)\n if first_b11 is True. See get_band_diagonal_mask for how the base mask is computed.\n This function takes that mask and first zeros out any past context if first_b11 is True.\n Say our context is in chunks of length t (so here T = 4t). We see that in the second batch we recieved first=True\n context t t t t\n first F T F F",
+ "type": "code",
+ "location": "/lib/masked_attention.py:32-54"
+ },
+ "197": {
+ "file_id": 9,
+ "content": "This function takes the masked_attention function from Video-Pre-Training/lib/masked_attention.py and generates a Boolean mask of shape (batchsize, t, T) based on the given parameters. The mask will have the upper triangle and lower triangle (if maxlen is not None) masked out. The get_mask function takes additional parameters and returns a band diagonal mask that respects the masking past states if first_b11 is True, by zeros any past context.",
+ "type": "comment"
+ },
+ "198": {
+ "file_id": 9,
+ "content": " Now, given this the mask should mask out anything prior to T < t; however since we don't have access to the past first_b11's\n we need to keep a state of the mask at those past timesteps. This is what state_mask is.\n In particular state_mask is a [b, t, T - t] mask matrix that contains the mask for the past T - t frames.\n Args: (See get_band_diagonal_mask for remaining args)\n first_b11: boolean tensor with shape [batchsize, 1, 1] indicating if the first timestep for each batch element had first=True\n state_mask: mask tensor of shape [b, t, T - t]\n t: number of mask rows (presumably number of frames for which we take gradient)\n T: number of mask columns (t + the number of past frames we keep in context)\n maxlen: actual context length\n heads: number of attention heads\n device: torch device\n Returns:\n m_btT: Boolean mask of shape (batchsize * heads, t, T)\n state_mask: updated state_mask\n \"\"\"\n b = first_b11.shape[0]",
+ "type": "code",
+ "location": "/lib/masked_attention.py:55-73"
+ },
+ "199": {
+ "file_id": 9,
+ "content": "This function receives various inputs including `first_b11`, `state_mask`, `t`, `T`, `maxlen`, `heads`, and `device`. It will return a Boolean mask of shape (batchsize * heads, t, T) and an updated state_mask. The purpose of this function is to update the state_mask based on the given inputs for the masked attention mechanism.",
+ "type": "comment"
+ }
+}
\ No newline at end of file
diff --git a/docs/data/2.json b/docs/data/2.json
new file mode 100644
index 0000000..46885e1
--- /dev/null
+++ b/docs/data/2.json
@@ -0,0 +1,545 @@
+{
+ "200": {
+ "file_id": 9,
+ "content": " if state_mask is None:\n state_mask = th.zeros((b, 1, T - t), dtype=bool, device=device)\n m_btT = get_band_diagonal_mask(t, T, maxlen, b, device).clone() # Should be shape B, t, T\n not_first = ~first_b11.to(device=device)\n m_btT[:, :, :-t] &= not_first # Zero out anything in the past if first is true\n m_btT[:, :, :-t] &= state_mask\n m_bhtT = m_btT[:, None].repeat_interleave(heads, dim=1)\n m_btT = m_bhtT.reshape((b * heads), t, T)\n # Update state_mask such that it reflects the most recent first\n state_mask = th.cat(\n [\n state_mask[:, :, t:] & not_first,\n th.ones((b, 1, min(t, T - t)), dtype=bool, device=device),\n ],\n dim=-1,\n )\n return m_btT, state_mask\nclass MaskedAttention(nn.Module):\n \"\"\"\n Transformer self-attention layer that removes frames from previous episodes from the hidden state under certain constraints.\n The constraints are:\n - The \"first\" flag can only be true for the first timestep of each batch. An assert will fire if other timesteps have first = True.",
+ "type": "code",
+ "location": "/lib/masked_attention.py:75-102"
+ },
+ "201": {
+ "file_id": 9,
+ "content": "This code is creating a mask for self-attention in transformer layers. It ensures that frames from previous episodes are not considered in the attention calculation for each episode. The mask is generated based on the \"first\" flag, which indicates if it's the first timestep of each batch, and the state_mask to exclude past frames.",
+ "type": "comment"
+ },
+ "202": {
+ "file_id": 9,
+ "content": " input_size: The dimension of the input (which also happens to be the size of the output)\n memory_size: The number of frames to keep in the inner state. Note that when attending, we will be able to attend\n to both the frames in the inner state (which presumably won't have gradients anymore) and the frames\n in the batch. \"mask\" for some additional considerations on this.\n heads: The number of attention heads to use. Note that we will split the input into this number of heads, so\n input_size needs to be divisible by heads.\n timesteps: number of timesteps with which we'll be taking gradient\n mask: Can be \"none\" or \"clipped_causal\". \"clipped_causal\" is a normal causal mask but solves the following minor problem:\n if you have a state of length 128 and a batch of 128 frames, then the first frame of your batch will be able to\n attend to 128 previous frames, but the last one will be able to attend to 255 previous frames. In this example,",
+ "type": "code",
+ "location": "/lib/masked_attention.py:104-113"
+ },
+ "203": {
+ "file_id": 9,
+ "content": "The code is describing the parameters and considerations of a masked attention mechanism. The input size, memory size, number of heads, timesteps, and mask are explained. The memory size allows attending to both inner state frames and batch frames, while the mask option handles potential imbalances between the first and last frames' attending capabilities.",
+ "type": "comment"
+ },
+ "204": {
+ "file_id": 9,
+ "content": " \"clipped_causal\" will make it so that the last frame can only attend to 128 previous frames, so that there is no\n bias coming from the position in the batch. None simply allows you to attend to any frame in the state + batch,\n which means you can also attend to future frames.\n \"\"\"\n @store_args\n def __init__(\n self,\n input_size,\n memory_size: int,\n heads: int,\n timesteps: int,\n mask: str = \"clipped_causal\",\n init_scale=1,\n norm=\"none\",\n log_scope=\"sa\",\n use_muP_factor=False,\n ):\n super().__init__()\n assert mask in {\"none\", \"clipped_causal\"}\n assert memory_size >= 0\n self.maxlen = memory_size - timesteps\n if mask == \"none\":\n mask = None\n self.orc_attn = xf.All2All(heads, self.maxlen, mask=mask is not None)\n self.orc_block = xf.SelfAttentionLayer(\n input_size,\n self.orc_attn,\n scale=init_scale,\n relattn=True,\n cache_keep_len=self.maxlen,",
+ "type": "code",
+ "location": "/lib/masked_attention.py:114-147"
+ },
+ "205": {
+ "file_id": 9,
+ "content": "The function initializes an object for masked attention. It takes in parameters such as input size, memory size, number of heads, timesteps, and a mask option ('clipped_causal' or 'none'). The maximum length is calculated based on the memory size and timesteps. If the mask option is set to 'none', the mask parameter is set to None. An All2All object for attention is created with heads, maxlen, and the mask value. Finally, a SelfAttentionLayer object is initialized with input size, the All2All attention object, and other parameters such as scale, relattn, and cache_keep_len set accordingly.",
+ "type": "comment"
+ },
+ "206": {
+ "file_id": 9,
+ "content": " norm=norm,\n log_scope=log_scope,\n use_muP_factor=use_muP_factor,\n )\n def initial_state(self, batchsize: int, device=None):\n \"\"\"Return the initial state mask (None) and the initial state of the transformer (zerod out keys and queries)\"\"\"\n state = self.orc_block.initial_state(batchsize, initial_T=self.maxlen)\n state_mask = None\n if device is not None:\n state = tree_map(lambda x: x.to(device), state)\n return state_mask, state\n def forward(self, input_bte, first_bt, state):\n \"\"\"Forward propagation of a single layer\"\"\"\n state_mask, xf_state = state\n t = first_bt.shape[1]\n if self.mask == \"clipped_causal\":\n new_mask, state_mask = get_mask(\n first_b11=first_bt[:, [[0]]],\n state_mask=state_mask,\n t=t,\n T=t + self.maxlen,\n maxlen=self.maxlen,\n heads=self.heads,\n device=input_bte.device,",
+ "type": "code",
+ "location": "/lib/masked_attention.py:148-173"
+ },
+ "207": {
+ "file_id": 9,
+ "content": "This code defines a class for Masked Attention, which has methods for initializing the state, forward propagation of a single layer, and defining the mask type. The initial_state method returns the initial state mask (None) and the initial state of the transformer with keys and queries zeros out. The forward method performs forward propagation of a single layer using the input, first_bt, and state as inputs. If the mask type is \"clipped_causal\", it applies a specific mask to the input.",
+ "type": "comment"
+ },
+ "208": {
+ "file_id": 9,
+ "content": " )\n self.orc_block.attn.mask = new_mask\n output, xf_state = self.orc_block(input_bte, xf_state)\n return output, (state_mask, xf_state)\n def get_log_keys(self):\n # These are logged in xf.SelfAttentionLayer\n return [f\"activation_{stat}/{self.log_scope}/{k}\" for k in [\"K\", \"Q\", \"V\", \"A\", \"Aproj\"] for stat in [\"mean\", \"std\"]]",
+ "type": "code",
+ "location": "/lib/masked_attention.py:174-182"
+ },
+ "209": {
+ "file_id": 9,
+ "content": "This code is defining a method in the class and returning comments for the code block. The method seems to be related to attention mechanism, where it applies masking to the input and returns output and state information. The log keys are defined as well for further logging purposes.",
+ "type": "comment"
+ },
+ "210": {
+ "file_id": 10,
+ "content": "/lib/minecraft_util.py",
+ "type": "filepath"
+ },
+ "211": {
+ "file_id": 10,
+ "content": "The code uses a decorator function to compute normalized entropy from categorical head outputs, considering masks and ignoring single-option cases. It also calculates the entropy of categorical and diagonal Gaussian action heads within a module by iterating over key-value pairs and returns average entropy.",
+ "type": "summary"
+ },
+ "212": {
+ "file_id": 10,
+ "content": "import functools\nimport inspect\nfrom typing import Optional, Tuple\nimport numpy as np\nimport torch\nfrom lib.action_head import (CategoricalActionHead, DiagGaussianActionHead,\n DictActionHead)\ndef store_args(method):\n \"\"\"Stores provided method args as instance attributes.\"\"\"\n argspec = inspect.getfullargspec(method)\n defaults = {}\n if argspec.defaults is not None:\n defaults = dict(zip(argspec.args[-len(argspec.defaults) :], argspec.defaults))\n if argspec.kwonlydefaults is not None:\n defaults.update(argspec.kwonlydefaults)\n arg_names = argspec.args[1:]\n @functools.wraps(method)\n def wrapper(*positional_args, **keyword_args):\n self = positional_args[0]\n # Get default arg values\n args = defaults.copy()\n # Add provided arg values\n for name, value in zip(arg_names, positional_args[1:]):\n args[name] = value\n args.update(keyword_args)\n self.__dict__.update(args)\n return method(*positional_args, **keyword_args)",
+ "type": "code",
+ "location": "/lib/minecraft_util.py:1-32"
+ },
+ "213": {
+ "file_id": 10,
+ "content": "This code defines a decorator function `store_args` that takes a method as input, and when the decorated method is called, it stores its arguments as instance attributes of the class. It also handles default argument values and keyword-only arguments.",
+ "type": "comment"
+ },
+ "214": {
+ "file_id": 10,
+ "content": " return wrapper\ndef get_norm_entropy_from_cat_head(module, name, masks, logits):\n # Note that the mask has already been applied to the logits at this point\n entropy = -torch.sum(torch.exp(logits) * logits, dim=-1)\n if name in masks:\n n = torch.sum(masks[name], dim=-1, dtype=torch.float)\n norm_entropy = entropy / torch.log(n)\n # When the mask only allows one option the normalized entropy makes no sense\n # as it is basically both maximal (the distribution is as uniform as it can be)\n # and minimal (there is no variance at all).\n # A such, we ignore them for purpose of calculating entropy.\n zero = torch.zeros_like(norm_entropy)\n norm_entropy = torch.where(n.eq(1.0), zero, norm_entropy)\n count = n.not_equal(1.0).int()\n else:\n n = torch.tensor(logits.shape[-1], dtype=torch.float)\n norm_entropy = entropy / torch.log(n)\n count = torch.ones_like(norm_entropy, dtype=torch.int)\n # entropy is per-entry, still of size self.output_shape[:-1]; we need to reduce of the rest of it.",
+ "type": "code",
+ "location": "/lib/minecraft_util.py:34-55"
+ },
+ "215": {
+ "file_id": 10,
+ "content": "This code calculates the normalized entropy from categorical head outputs and applies a mask if necessary. It divides the entropy by the log of the number of possible options, ignoring cases where only one option is available to avoid nonsense results. The count variable keeps track of how many times the condition for ignoring an option has been met.",
+ "type": "comment"
+ },
+ "216": {
+ "file_id": 10,
+ "content": " for _ in module.output_shape[:-1]:\n norm_entropy = norm_entropy.sum(dim=-1)\n count = count.sum(dim=-1)\n return norm_entropy, count\ndef get_norm_cat_entropy(module, masks, logits, template) -> Tuple[torch.Tensor, torch.Tensor]:\n entropy_sum = torch.zeros_like(template, dtype=torch.float)\n counts = torch.zeros_like(template, dtype=torch.int)\n for k, subhead in module.items():\n if isinstance(subhead, DictActionHead):\n entropy, count = get_norm_cat_entropy(subhead, masks, logits[k], template)\n elif isinstance(subhead, CategoricalActionHead):\n entropy, count = get_norm_entropy_from_cat_head(subhead, k, masks, logits[k])\n else:\n continue\n entropy_sum += entropy\n counts += count\n return entropy_sum, counts\ndef get_diag_guassian_entropy(module, logits, template) -> Optional[torch.Tensor]:\n entropy_sum = torch.zeros_like(template, dtype=torch.float)\n count = torch.zeros(1, device=template.device, dtype=torch.int)",
+ "type": "code",
+ "location": "/lib/minecraft_util.py:56-79"
+ },
+ "217": {
+ "file_id": 10,
+ "content": "This code calculates the entropy of categorical and diagonal Gaussian action heads in a given module and returns the total entropy and counts.",
+ "type": "comment"
+ },
+ "218": {
+ "file_id": 10,
+ "content": " for k, subhead in module.items():\n if isinstance(subhead, DictActionHead):\n entropy_sum += get_diag_guassian_entropy(subhead, logits[k], template)\n elif isinstance(subhead, DiagGaussianActionHead):\n entropy_sum += module.entropy(logits)\n else:\n continue\n count += 1\n return entropy_sum / count",
+ "type": "code",
+ "location": "/lib/minecraft_util.py:80-88"
+ },
+ "219": {
+ "file_id": 10,
+ "content": "Iterates over each key-value pair in the module, adds entropy from DiagGaussianActionHead or DictActionHead to entropy_sum, and returns the average entropy.",
+ "type": "comment"
+ },
+ "220": {
+ "file_id": 11,
+ "content": "/lib/misc.py",
+ "type": "filepath"
+ },
+ "221": {
+ "file_id": 11,
+ "content": "Both comments discuss data processing tasks, with Comment A focusing on calculating and dividing products in a list 'x', while Comment B describes a function for reshaping input data, considering exceptions and undo functions, and utilizing a 'known' dictionary for shape inference.",
+ "type": "summary"
+ },
+ "222": {
+ "file_id": 11,
+ "content": "import numpy as np\nimport torch as th\ndef intprod(xs):\n \"\"\"\n Product of a sequence of integers\n \"\"\"\n out = 1\n for x in xs:\n out *= x\n return out\ndef safezip(*args):\n \"\"\"\n Check that lengths of sequences are the same, then zip them\n \"\"\"\n args = [list(a) for a in args]\n n = len(args[0])\n for arg in args[1:]:\n assert len(arg) == n, f\"length mismatch: {list(map(len, args))}\"\n return list(zip(*args))\ndef transpose(x, before, after):\n \"\"\"\n Usage: x_bca = transpose(x_abc, 'abc', 'bca')\n \"\"\"\n assert sorted(before) == sorted(after), f\"cannot transpose {before} to {after}\"\n assert x.ndim == len(\n before\n ), f\"before spec '{before}' has length {len(before)} but x has {x.ndim} dimensions: {tuple(x.shape)}\"\n return x.permute(tuple(before.index(i) for i in after))\ndef transpose_undo(x, before, after, *, undo=None):\n \"\"\"\n Usage:\n x_bca, undo = transpose_undo(x_abc, 'abc', 'bca')\n x_bca = fully_connected_layer(x_bca)\n x_abc = undo(x_bca)\n \"\"\"",
+ "type": "code",
+ "location": "/lib/misc.py:1-43"
+ },
+ "223": {
+ "file_id": 11,
+ "content": "The code contains several functions related to data manipulation, such as calculating the product of a sequence of integers (`intprod`), checking and zipping lengths of sequences (`safezip`), transposing data with given before and after specifications (`transpose`), and undoing a data transposition (`transpose_undo`).",
+ "type": "comment"
+ },
+ "224": {
+ "file_id": 11,
+ "content": " return (\n transpose(x, before, after),\n compose_undo(undo, lambda x: transpose(x, before=after, after=before)),\n )\ndef compose_undo(u1, u2):\n assert u2 is not None\n if u1 is None:\n return u2\n def u(x):\n x = u2(x)\n x = u1(x)\n return x\n return u\nNO_BIND = \"__nobind\"\ndef _parse_reshape_str(s, kind):\n assert kind in (\"before\", \"after\")\n result = []\n n_underscores = 0\n for i, part in enumerate(s.split(\",\")):\n part = part.strip()\n if part == \"?\" and kind == \"before\":\n result.append([f\"__{i}\"])\n elif part == \"_\":\n result.append([f\"{NO_BIND}_{n_underscores}\"])\n n_underscores += 1\n else:\n result.append([term.strip() for term in part.split(\"*\")])\n return result\ndef _infer_part(part, concrete_dim, known, index, full_shape):\n if type(part) is int:\n return part\n assert isinstance(part, list), part\n lits = []\n syms = []\n for term in part:\n if type(term) is int:",
+ "type": "code",
+ "location": "/lib/misc.py:44-89"
+ },
+ "225": {
+ "file_id": 11,
+ "content": "Function `transpose` takes an input tensor, and a list of tuples specifying the axes to permute. It returns the transposed tensor and a function that undoes the transpose operation.\nFunction `compose_undo` combines two transformation functions into a single one that applies them in reverse order. If either is None, it simply returns the other. Otherwise, it creates an anonymous function that first applies the second transformation, then the first, and finally returns the result.\nString `NO_BIND` is used as a placeholder when a dimension cannot be bound to a specific variable.\nFunction `_parse_reshape_str` parses a string of the form \"x,*y,?z\" where x, y, and z are integers or '?' symbols. It returns a list containing three lists: the first contains '?' characters for 'before', '_' characters for 'after', and actual numbers for 'none'. The second contains actual numbers for 'before', and the third contains actual numbers for 'after'.\nFunction `_infer_part` infers the part of the tensor shape to be used based on the type of the input. If it is an integer, it returns that integer. Otherwise, it processes a list of terms, handling integers and strings containing '*' symbols differently.",
+ "type": "comment"
+ },
+ "226": {
+ "file_id": 11,
+ "content": " lits.append(term)\n elif type(term) is str:\n syms.append(term)\n else:\n raise TypeError(f\"got {type(term)} but expected int or str\")\n int_part = 1\n for x in lits:\n int_part *= x\n if len(syms) == 0:\n return int_part\n elif len(syms) == 1 and concrete_dim is not None:\n assert concrete_dim % int_part == 0, f\"{concrete_dim} % {int_part} != 0 (at index {index}, full shape is {full_shape})\"\n v = concrete_dim // int_part\n if syms[0] in known:\n assert (\n known[syms[0]] == v\n ), f\"known value for {syms[0]} is {known[syms[0]]} but found value {v} at index {index} (full shape is {full_shape})\"\n else:\n known[syms[0]] = v\n return concrete_dim\n else:\n for i in range(len(syms)):\n if syms[i] in known:\n syms[i] = known[syms[i]]\n else:\n try:\n syms[i] = int(syms[i])\n except ValueError:\n pass",
+ "type": "code",
+ "location": "/lib/misc.py:90-118"
+ },
+ "227": {
+ "file_id": 11,
+ "content": "This function takes a term, checks if it's an int or str, and performs calculations based on the input type. If int, it multiplies all literals (int or float) and returns the result. If str, it checks if there's only one symbol and concrete_dim is given. It asserts that concrete_dim is divisible by int_part and calculates v. If the symbol is already in known values, it asserts the known value matches. If not, it adds the symbol to known with its calculated value. Finally, if there are multiple symbols, it iterates through them and converts strings to ints.",
+ "type": "comment"
+ },
+ "228": {
+ "file_id": 11,
+ "content": " return lits + syms\ndef _infer_step(args):\n known, desc, shape = args\n new_known = known.copy()\n new_desc = desc.copy()\n for i in range(len(desc)):\n if shape is None:\n concrete_dim = None\n else:\n concrete_dim = shape[i]\n new_desc[i] = _infer_part(part=desc[i], concrete_dim=concrete_dim, known=new_known, index=i, full_shape=shape)\n return new_known, new_desc, shape\ndef _infer(known, desc, shape):\n if shape is not None:\n assert len(desc) == len(shape), f\"desc has length {len(desc)} but shape has length {len(shape)} (shape={shape})\"\n known, desc, shape = fixed_point(_infer_step, (known, desc, shape))\n return desc, known\ndef fixed_point(f, x, eq=None):\n if eq is None:\n eq = lambda a, b: a == b\n while True:\n new_x = f(x)\n if eq(x, new_x):\n return x\n else:\n x = new_x\ndef _infer_question_mark(x, total_product):\n try:\n question_mark_index = x.index([\"?\"])\n except ValueError:\n return x",
+ "type": "code",
+ "location": "/lib/misc.py:119-157"
+ },
+ "229": {
+ "file_id": 11,
+ "content": "This function takes an existing list `lits` and a symbol `syms` and returns a new list where the `syms` occur after all elements in `lits`. The `_infer_step()` function takes known values, description, and shape as arguments. It creates copies of the new known and description lists and loops through each element in the description list. If a specific shape is provided, it assigns the corresponding dimension to `concrete_dim`. Then, it calls `_infer_part()` with the part, concrete dimension, known values, index, and full shape as arguments. The function returns the new known values, description list, and shape. The `fixed_point()` function uses a lambda function to check for equality between two inputs. It continues to apply the given function to the input until it reaches a fixed point where the input remains unchanged. Lastly, `_infer_question_mark()` function tries to find the index of \"?\" in the list and returns the list if found.",
+ "type": "comment"
+ },
+ "230": {
+ "file_id": 11,
+ "content": " observed_product = 1\n for i in range(len(x)):\n if i != question_mark_index:\n assert type(x[i]) is int, f\"when there is a question mark, there can be no other unknown values (full list: {x})\"\n observed_product *= x[i]\n assert (\n observed_product and total_product % observed_product == 0\n ), f\"{total_product} is not divisible by {observed_product}\"\n value = total_product // observed_product\n x = x.copy()\n x[question_mark_index] = value\n return x\ndef _ground(x, known, infer_question_mark_with=None):\n x, known = _infer(known=known, desc=x, shape=None)\n if infer_question_mark_with:\n x = _infer_question_mark(x, infer_question_mark_with)\n for part in x:\n assert type(part) is int, f\"cannot infer value of {part}\"\n return x\ndef _handle_ellipsis(x, before, after):\n ell = [\"...\"]\n try:\n i = before.index(ell)\n l = len(x.shape) - len(before) + 1\n ellipsis_value = x.shape[i : i + l]\n ellipsis_value = list(ellipsis_value)",
+ "type": "code",
+ "location": "/lib/misc.py:158-187"
+ },
+ "231": {
+ "file_id": 11,
+ "content": "- Calculate product of known values in list 'x'\n- Assert that the total product is divisible by observed product and return error message if not\n- Update list 'x' with calculated value for the question mark index and return it",
+ "type": "comment"
+ },
+ "232": {
+ "file_id": 11,
+ "content": " before = before[:i] + ellipsis_value + before[i + 1 :]\n except ValueError:\n pass\n try:\n i = after.index(ell)\n after = after[:i] + ellipsis_value + after[i + 1 :]\n except ValueError:\n pass\n except UnboundLocalError as e:\n raise ValueError(\"there cannot be an ellipsis in 'after' unless there is an ellipsis in 'before'\") from e\n return before, after\ndef reshape_undo(inp, before, after, *, undo=None, known=None, **kwargs):\n \"\"\"\n Usage:\n x_Bhwse, undo = reshape_undo(\n x_bthwe,\n 'b, t, ..., stride*e',\n 'b*t, ..., stride, e',\n stride=7\n )\n x_Bhwse = do_some_stuff(x_Bhwse)\n x_bthwe = undo(x_Bhwse)\n It's necessary to pass known values as keywords only\n when they can't be inferred from the shape.\n (Eg. in the above example we needed to pass\n stride but not b, t, or e, since those can be determined from\n inp.shape once stride is known.)\n \"\"\"\n if known:\n known = {**kwargs, **known}\n else:\n known = kwargs",
+ "type": "code",
+ "location": "/lib/misc.py:188-223"
+ },
+ "233": {
+ "file_id": 11,
+ "content": "The code performs shape reshaping operations and handles any exceptions that may occur during the process. It takes input, initial 'before' and 'after' shapes as arguments, and optional 'undo' and 'known' parameters. If 'known' is provided, it becomes a dictionary of known values to help with shape inference. The function returns reshaped input and an 'undo' function for reverting the reshape operation.",
+ "type": "comment"
+ },
+ "234": {
+ "file_id": 11,
+ "content": " assert type(before) is type(after), f\"{type(before)} != {type(after)}\"\n assert isinstance(inp, (th.Tensor, np.ndarray)), f\"require tensor or ndarray but got {type(inp)}\"\n assert isinstance(before, (str, list)), f\"require str or list but got {type(before)}\"\n if isinstance(before, str):\n before = _parse_reshape_str(before, \"before\")\n after = _parse_reshape_str(after, \"after\")\n before, after = _handle_ellipsis(inp, before, after)\n before_saved, after_saved = before, after\n before, known = _infer(known=known, desc=before, shape=inp.shape)\n before = _ground(before, known, product(inp.shape))\n after = _ground(after, known, product(inp.shape))\n known = {k: v for k, v in known.items() if not k.startswith(NO_BIND)}\n assert tuple(inp.shape) == tuple(before), f\"expected shape {before} but got shape {inp.shape}\"\n assert product(inp.shape) == product(\n after\n ), f\"cannot reshape {inp.shape} to {after} because the number of elements does not match\"\n return (",
+ "type": "code",
+ "location": "/lib/misc.py:224-240"
+ },
+ "235": {
+ "file_id": 11,
+ "content": "Ensures input types are correct and parses reshape string if input is a string. Infers the shape of the input, grounds it, and removes any bindings marked with NO_BIND. Asserts that the shapes match and returns the result.",
+ "type": "comment"
+ },
+ "236": {
+ "file_id": 11,
+ "content": " inp.reshape(after),\n compose_undo(undo, lambda inp: reshape(inp, after_saved, before_saved, known=known)),\n )\ndef reshape(*args, **kwargs):\n \"\"\"\n Please see the documenation for reshape_undo.\n \"\"\"\n x, _ = reshape_undo(*args, **kwargs)\n return x\ndef product(xs, one=1):\n result = one\n for x in xs:\n result = result * x\n return result\ndef exact_div(a, b):\n assert a % b == 0, f\"{a} is not divisible by {b}\"\n return a // b",
+ "type": "code",
+ "location": "/lib/misc.py:241-263"
+ },
+ "237": {
+ "file_id": 11,
+ "content": "The code contains functions for reshaping arrays, calculating products of a list of numbers, and performing an exact division.",
+ "type": "comment"
+ },
+ "238": {
+ "file_id": 12,
+ "content": "/lib/mlp.py",
+ "type": "filepath"
+ },
+ "239": {
+ "file_id": 12,
+ "content": "MLP class defines a neural network with specified input, hidden, and output layers. It uses normed linear layers and applies the specified activation function to hidden layers.",
+ "type": "summary"
+ },
+ "240": {
+ "file_id": 12,
+ "content": "import torch as th\nfrom torch import nn\nfrom lib import misc\nfrom lib import torch_util as tu\nclass MLP(nn.Module):\n def __init__(self, insize, nhidlayer, outsize, hidsize, hidactiv, dtype=th.float32):\n super().__init__()\n self.insize = insize\n self.nhidlayer = nhidlayer\n self.outsize = outsize\n in_sizes = [insize] + [hidsize] * nhidlayer\n out_sizes = [hidsize] * nhidlayer + [outsize]\n self.layers = nn.ModuleList(\n [tu.NormedLinear(insize, outsize, dtype=dtype) for (insize, outsize) in misc.safezip(in_sizes, out_sizes)]\n )\n self.hidactiv = hidactiv\n def forward(self, x):\n *hidlayers, finallayer = self.layers\n for layer in hidlayers:\n x = layer(x)\n x = self.hidactiv(x)\n x = finallayer(x)\n return x\n @property\n def output_shape(self):\n return (self.outsize,)",
+ "type": "code",
+ "location": "/lib/mlp.py:1-31"
+ },
+ "241": {
+ "file_id": 12,
+ "content": "MLP class defines a neural network with specified input, hidden, and output layers. It uses normed linear layers and applies the specified activation function to hidden layers.",
+ "type": "comment"
+ },
+ "242": {
+ "file_id": 13,
+ "content": "/lib/normalize_ewma.py",
+ "type": "filepath"
+ },
+ "243": {
+ "file_id": 13,
+ "content": "The NormalizeEwma module normalizes data across dimensions, calculates debiased mean and variance, and provides methods for normalization and denormalization. It maintains running mean and variance for input vectors during training while avoiding backpropagation issues.",
+ "type": "summary"
+ },
+ "244": {
+ "file_id": 13,
+ "content": "import numpy as np\nimport torch\nimport torch.nn as nn\nclass NormalizeEwma(nn.Module):\n \"\"\"Normalize a vector of observations - across the first norm_axes dimensions\"\"\"\n def __init__(self, input_shape, norm_axes=2, beta=0.99999, per_element_update=False, epsilon=1e-5):\n super().__init__()\n self.input_shape = input_shape\n self.norm_axes = norm_axes\n self.epsilon = epsilon\n self.beta = beta\n self.per_element_update = per_element_update\n self.running_mean = nn.Parameter(torch.zeros(input_shape, dtype=torch.float), requires_grad=False)\n self.running_mean_sq = nn.Parameter(torch.zeros(input_shape, dtype=torch.float), requires_grad=False)\n self.debiasing_term = nn.Parameter(torch.tensor(0.0, dtype=torch.float), requires_grad=False)\n def reset_parameters(self):\n self.running_mean.zero_()\n self.running_mean_sq.zero_()\n self.debiasing_term.zero_()\n def running_mean_var(self):\n debiased_mean = self.running_mean / self.debiasing_term.clamp(min=self.epsilon)",
+ "type": "code",
+ "location": "/lib/normalize_ewma.py:1-28"
+ },
+ "245": {
+ "file_id": 13,
+ "content": "NormalizeEwma is an EWMA (Exponential Weighted Moving Average) normalization module for vectors of observations. It normalizes the data across specific dimensions, with optional per-element update and debiasing term.",
+ "type": "comment"
+ },
+ "246": {
+ "file_id": 13,
+ "content": " debiased_mean_sq = self.running_mean_sq / self.debiasing_term.clamp(min=self.epsilon)\n debiased_var = (debiased_mean_sq - debiased_mean ** 2).clamp(min=1e-2)\n return debiased_mean, debiased_var\n def forward(self, input_vector):\n # Make sure input is float32\n input_vector = input_vector.to(torch.float)\n if self.training:\n # Detach input before adding it to running means to avoid backpropping through it on\n # subsequent batches.\n detached_input = input_vector.detach()\n batch_mean = detached_input.mean(dim=tuple(range(self.norm_axes)))\n batch_sq_mean = (detached_input ** 2).mean(dim=tuple(range(self.norm_axes)))\n if self.per_element_update:\n batch_size = np.prod(detached_input.size()[: self.norm_axes])\n weight = self.beta ** batch_size\n else:\n weight = self.beta\n self.running_mean.mul_(weight).add_(batch_mean * (1.0 - weight))\n self.running_mean_sq.mul_(weight).add_(batch_sq_mean * (1.0 - weight))",
+ "type": "code",
+ "location": "/lib/normalize_ewma.py:29-51"
+ },
+ "247": {
+ "file_id": 13,
+ "content": "This code calculates the debiased mean and variance of input vectors for each batch while training. It normalizes the input to float32, updates running means and squared means with detached inputs, and applies weighted averages to avoid backpropagation through subsequent batches.",
+ "type": "comment"
+ },
+ "248": {
+ "file_id": 13,
+ "content": " self.debiasing_term.mul_(weight).add_(1.0 * (1.0 - weight))\n mean, var = self.running_mean_var()\n return (input_vector - mean[(None,) * self.norm_axes]) / torch.sqrt(var)[(None,) * self.norm_axes]\n def denormalize(self, input_vector):\n \"\"\"Transform normalized data back into original distribution\"\"\"\n mean, var = self.running_mean_var()\n return input_vector * torch.sqrt(var)[(None,) * self.norm_axes] + mean[(None,) * self.norm_axes]",
+ "type": "code",
+ "location": "/lib/normalize_ewma.py:52-60"
+ },
+ "249": {
+ "file_id": 13,
+ "content": "This class provides methods to normalize and denormalize data. It also maintains running mean and variance.",
+ "type": "comment"
+ },
+ "250": {
+ "file_id": 14,
+ "content": "/lib/policy.py",
+ "type": "filepath"
+ },
+ "251": {
+ "file_id": 14,
+ "content": "The code includes classes for image preprocessing, reinforcement learning with optional parameters, and a MinecraftAgentPolicy network using PyTorch neural networks. It handles policy decisions, actions, and probabilities in the policy network while utilizing 3D convolution layers for reinforcement learning models.",
+ "type": "summary"
+ },
+ "252": {
+ "file_id": 14,
+ "content": "from copy import deepcopy\nfrom email import policy\nfrom typing import Dict, Optional\nimport numpy as np\nimport torch as th\nfrom gym3.types import DictType\nfrom torch import nn\nfrom torch.nn import functional as F\nfrom lib.action_head import make_action_head\nfrom lib.action_mapping import CameraHierarchicalMapping\nfrom lib.impala_cnn import ImpalaCNN\nfrom lib.normalize_ewma import NormalizeEwma\nfrom lib.scaled_mse_head import ScaledMSEHead\nfrom lib.tree_util import tree_map\nfrom lib.util import FanInInitReLULayer, ResidualRecurrentBlocks\nfrom lib.misc import transpose\nclass ImgPreprocessing(nn.Module):\n \"\"\"Normalize incoming images.\n :param img_statistics: remote path to npz file with a mean and std image. If specified\n normalize images using this.\n :param scale_img: If true and img_statistics not specified, scale incoming images by 1/255.\n \"\"\"\n def __init__(self, img_statistics: Optional[str] = None, scale_img: bool = True):\n super().__init__()\n self.img_mean = None\n if img_statistics is not None:",
+ "type": "code",
+ "location": "/lib/policy.py:1-32"
+ },
+ "253": {
+ "file_id": 14,
+ "content": "This code defines a class called \"ImgPreprocessing\" which is used to normalize incoming images. It has an optional parameter for img_statistics, a remote path to a npz file containing mean and std image values. If img_statistics is provided, the images are normalized using those values. Otherwise, if no img_statistics is provided but scale_img is True, the images are scaled by 1/255. The class inherits from nn.Module which allows it to be used as part of a neural network in PyTorch. The code also initializes an instance variable self.img_mean to None.",
+ "type": "comment"
+ },
+ "254": {
+ "file_id": 14,
+ "content": " img_statistics = dict(**np.load(img_statistics))\n self.img_mean = nn.Parameter(th.Tensor(img_statistics[\"mean\"]), requires_grad=False)\n self.img_std = nn.Parameter(th.Tensor(img_statistics[\"std\"]), requires_grad=False)\n else:\n self.ob_scale = 255.0 if scale_img else 1.0\n def forward(self, img):\n x = img.to(dtype=th.float32)\n if self.img_mean is not None:\n x = (x - self.img_mean) / self.img_std\n else:\n x = x / self.ob_scale\n return x\nclass ImgObsProcess(nn.Module):\n \"\"\"ImpalaCNN followed by a linear layer.\n :param cnn_outsize: impala output dimension\n :param output_size: output size of the linear layer.\n :param dense_init_norm_kwargs: kwargs for linear FanInInitReLULayer\n :param init_norm_kwargs: kwargs for 2d and 3d conv FanInInitReLULayer\n \"\"\"\n def __init__(\n self,\n cnn_outsize: int,\n output_size: int,\n dense_init_norm_kwargs: Dict = {},\n init_norm_kwargs: Dict = {},",
+ "type": "code",
+ "location": "/lib/policy.py:33-62"
+ },
+ "255": {
+ "file_id": 14,
+ "content": "This code defines a class named \"ImgObsProcess\" which is a subclass of nn.Module used for preprocessing images and observations. It loads image statistics (mean and std) from a file or uses default scale values based on the provided \"scale_img\". The forward method normalizes the input image by subtracting mean and dividing by std if img_mean and img_std are not None, otherwise it divides by ob_scale. The class also accepts parameters for creating an instance of ImpalaCNN followed by a linear layer.",
+ "type": "comment"
+ },
+ "256": {
+ "file_id": 14,
+ "content": " **kwargs,\n ):\n super().__init__()\n self.cnn = ImpalaCNN(\n outsize=cnn_outsize,\n init_norm_kwargs=init_norm_kwargs,\n dense_init_norm_kwargs=dense_init_norm_kwargs,\n **kwargs,\n )\n self.linear = FanInInitReLULayer(\n cnn_outsize,\n output_size,\n layer_type=\"linear\",\n **dense_init_norm_kwargs,\n )\n def forward(self, img):\n return self.linear(self.cnn(img))\nclass MinecraftPolicy(nn.Module):\n \"\"\"\n :param recurrence_type:\n None - No recurrence, adds no extra layers\n lstm - (Depreciated). Singular LSTM\n multi_layer_lstm - Multi-layer LSTM. Uses n_recurrence_layers to determine number of consecututive LSTMs\n Does NOT support ragged batching\n multi_masked_lstm - Multi-layer LSTM that supports ragged batching via the first vector. This model is slower\n Uses n_recurrence_layers to determine number of consecututive LSTMs",
+ "type": "code",
+ "location": "/lib/policy.py:63-91"
+ },
+ "257": {
+ "file_id": 14,
+ "content": "This code defines a class called \"Policy\" with an initializer and a forward method. The initializer takes various parameters, creates an ImpalaCNN and FanInInitReLULayer layers, and initializes the CNN layer with given parameters. The forward method applies these layers to input images and returns the result.\nThe code also defines a class called \"MinecraftPolicy\" that extends nn.Module and takes recurrence_type as parameter. It doesn't have any methods defined.",
+ "type": "comment"
+ },
+ "258": {
+ "file_id": 14,
+ "content": " transformer - Dense transformer\n :param init_norm_kwargs: kwargs for all FanInInitReLULayers.\n \"\"\"\n def __init__(\n self,\n recurrence_type=\"lstm\",\n impala_width=1,\n impala_chans=(16, 32, 32),\n obs_processing_width=256,\n hidsize=512,\n single_output=False, # True if we don't need separate outputs for action/value outputs\n img_shape=None,\n scale_input_img=True,\n only_img_input=False,\n init_norm_kwargs={},\n impala_kwargs={},\n # Unused argument assumed by forc.\n input_shape=None, # pylint: disable=unused-argument\n active_reward_monitors=None,\n img_statistics=None,\n first_conv_norm=False,\n diff_mlp_embedding=False,\n attention_mask_style=\"clipped_causal\",\n attention_heads=8,\n attention_memory_size=2048,\n use_pointwise_layer=True,\n pointwise_ratio=4,\n pointwise_use_activation=False,\n n_recurrence_layers=1,\n recurrence_is_residual=True,",
+ "type": "code",
+ "location": "/lib/policy.py:92-122"
+ },
+ "259": {
+ "file_id": 14,
+ "content": "This function is used to initialize an object of the class \"Policy\" which appears to be a deep learning model for reinforcement learning. The model can take both image and observation inputs, and uses a Dense transformer as part of its architecture. There are many optional parameters such as recurrence_type, impala_width, obs_processing_width, hidsize, single_output, img_shape, scale_input_img, only_img_input, init_norm_kwargs, impala_kwargs and more that can be used to customize the model.",
+ "type": "comment"
+ },
+ "260": {
+ "file_id": 14,
+ "content": " timesteps=None,\n use_pre_lstm_ln=True, # Not needed for transformer\n **unused_kwargs,\n ):\n super().__init__()\n assert recurrence_type in [\n \"multi_layer_lstm\",\n \"multi_layer_bilstm\",\n \"multi_masked_lstm\",\n \"transformer\",\n \"none\",\n ]\n active_reward_monitors = active_reward_monitors or {}\n self.single_output = single_output\n chans = tuple(int(impala_width * c) for c in impala_chans)\n self.hidsize = hidsize\n # Dense init kwargs replaces batchnorm/groupnorm with layernorm\n self.init_norm_kwargs = init_norm_kwargs\n self.dense_init_norm_kwargs = deepcopy(init_norm_kwargs)\n if self.dense_init_norm_kwargs.get(\"group_norm_groups\", None) is not None:\n self.dense_init_norm_kwargs.pop(\"group_norm_groups\", None)\n self.dense_init_norm_kwargs[\"layer_norm\"] = True\n if self.dense_init_norm_kwargs.get(\"batch_norm\", False):\n self.dense_init_norm_kwargs.pop(\"batch_norm\", False)",
+ "type": "code",
+ "location": "/lib/policy.py:123-150"
+ },
+ "261": {
+ "file_id": 14,
+ "content": "The code defines a class with an __init__ method that takes various arguments, including the recurrence_type, active_reward_monitors, single_output, impala_width, impala_chans, hidsize, init_norm_kwargs and timesteps. It performs an assertion on the recurrence_type, initializes some variables and dictionaries, and defines a few more attributes based on these arguments.",
+ "type": "comment"
+ },
+ "262": {
+ "file_id": 14,
+ "content": " self.dense_init_norm_kwargs[\"layer_norm\"] = True\n # Setup inputs\n self.img_preprocess = ImgPreprocessing(img_statistics=img_statistics, scale_img=scale_input_img)\n self.img_process = ImgObsProcess(\n cnn_outsize=256,\n output_size=hidsize,\n inshape=img_shape,\n chans=chans,\n nblock=2,\n dense_init_norm_kwargs=self.dense_init_norm_kwargs,\n init_norm_kwargs=init_norm_kwargs,\n first_conv_norm=first_conv_norm,\n **impala_kwargs,\n )\n self.pre_lstm_ln = nn.LayerNorm(hidsize) if use_pre_lstm_ln else None\n self.diff_obs_process = None\n self.recurrence_type = recurrence_type\n self.recurrent_layer = None\n self.recurrent_layer = ResidualRecurrentBlocks(\n hidsize=hidsize,\n timesteps=timesteps,\n recurrence_type=recurrence_type,\n is_residual=recurrence_is_residual,\n use_pointwise_layer=use_pointwise_layer,",
+ "type": "code",
+ "location": "/lib/policy.py:151-178"
+ },
+ "263": {
+ "file_id": 14,
+ "content": "Initializing layer norm for dense layers and setting up input processing components.",
+ "type": "comment"
+ },
+ "264": {
+ "file_id": 14,
+ "content": " pointwise_ratio=pointwise_ratio,\n pointwise_use_activation=pointwise_use_activation,\n attention_mask_style=attention_mask_style,\n attention_heads=attention_heads,\n attention_memory_size=attention_memory_size,\n n_block=n_recurrence_layers,\n )\n self.lastlayer = FanInInitReLULayer(hidsize, hidsize, layer_type=\"linear\", **self.dense_init_norm_kwargs)\n self.final_ln = th.nn.LayerNorm(hidsize)\n def output_latent_size(self):\n return self.hidsize\n def forward(self, ob, state_in, context):\n first = context[\"first\"]\n x = self.img_preprocess(ob[\"img\"])\n x = self.img_process(x)\n if self.diff_obs_process:\n processed_obs = self.diff_obs_process(ob[\"diff_goal\"])\n x = processed_obs + x\n if self.pre_lstm_ln is not None:\n x = self.pre_lstm_ln(x)\n if self.recurrent_layer is not None:\n x, state_out = self.recurrent_layer(x, first, state_in)\n else:",
+ "type": "code",
+ "location": "/lib/policy.py:179-208"
+ },
+ "265": {
+ "file_id": 14,
+ "content": "The code initializes a module with specified parameters including pointwise_ratio, pointwise_use_activation, attention_mask_style, attention_heads, attention_memory_size and n_block. Then it creates an instance of FanInInitReLULayer and LayerNorm for the last layer and final layer normalization respectively. It also defines a function output_latent_size to return the latent size, and another function forward which takes in observations, initial state, and context as input, performs image preprocessing and optional differential observation processing if specified, applies pre-LSTM normalization if present, then passes the processed data through the recurrent layer to obtain output x and updated state.",
+ "type": "comment"
+ },
+ "266": {
+ "file_id": 14,
+ "content": " state_out = state_in\n x = F.relu(x, inplace=False)\n x = self.lastlayer(x)\n x = self.final_ln(x)\n pi_latent = vf_latent = x\n if self.single_output:\n return pi_latent, state_out\n return (pi_latent, vf_latent), state_out\n def initial_state(self, batchsize):\n if self.recurrent_layer:\n return self.recurrent_layer.initial_state(batchsize)\n else:\n return None\nclass MinecraftAgentPolicy(nn.Module):\n def __init__(self, action_space, policy_kwargs, pi_head_kwargs):\n super().__init__()\n self.net = MinecraftPolicy(**policy_kwargs)\n self.action_space = action_space\n self.value_head = self.make_value_head(self.net.output_latent_size())\n self.pi_head = self.make_action_head(self.net.output_latent_size(), **pi_head_kwargs)\n def make_value_head(self, v_out_size: int, norm_type: str = \"ewma\", norm_kwargs: Optional[Dict] = None):\n return ScaledMSEHead(v_out_size, 1, norm_type=norm_type, norm_kwargs=norm_kwargs)",
+ "type": "code",
+ "location": "/lib/policy.py:209-238"
+ },
+ "267": {
+ "file_id": 14,
+ "content": "The code defines a class `MinecraftAgentPolicy` that inherits from `nn.Module`. It takes in an action space, policy kwargs, and pi_head kwargs as parameters during initialization. Inside the initialization, it creates a network `self.net` using `MinecraftPolicy`, a value head `self.value_head` using `make_value_head`, and a policy head `self.pi_head` using `make_action_head`. The code also defines a method `initial_state(batchsize)` that returns the initial state of the recurrent layer if it exists, otherwise it returns None.",
+ "type": "comment"
+ },
+ "268": {
+ "file_id": 14,
+ "content": " def make_action_head(self, pi_out_size: int, **pi_head_opts):\n return make_action_head(self.action_space, pi_out_size, **pi_head_opts)\n def initial_state(self, batch_size: int):\n return self.net.initial_state(batch_size)\n def reset_parameters(self):\n super().reset_parameters()\n self.net.reset_parameters()\n self.pi_head.reset_parameters()\n self.value_head.reset_parameters()\n def forward(self, obs, first: th.Tensor, state_in):\n if isinstance(obs, dict):\n # We don't want to mutate the obs input.\n obs = obs.copy()\n # If special \"mask\" key is in obs,\n # It's for masking the logits.\n # We take it out (the network doesn't need it)\n mask = obs.pop(\"mask\", None)\n else:\n mask = None\n (pi_h, v_h), state_out = self.net(obs, state_in, context={\"first\": first})\n pi_logits = self.pi_head(pi_h, mask=mask)\n vpred = self.value_head(v_h)\n return (pi_logits, vpred, None), state_out",
+ "type": "code",
+ "location": "/lib/policy.py:240-269"
+ },
+ "269": {
+ "file_id": 14,
+ "content": "This code defines a class that uses a neural network to make policy decisions. It includes methods for creating an action head, initializing the state, resetting parameters, and performing forward passes on input observations. The forward pass involves passing the observation through the network, extracting policy logits and value predictions using separate heads, and returning these outputs along with any updated state.",
+ "type": "comment"
+ },
+ "270": {
+ "file_id": 14,
+ "content": " def get_logprob_of_action(self, pd, action):\n \"\"\"\n Get logprob of taking action `action` given probability distribution\n (see `get_gradient_for_action` to get this distribution)\n \"\"\"\n ac = tree_map(lambda x: x.unsqueeze(1), action)\n log_prob = self.pi_head.logprob(ac, pd)\n assert not th.isnan(log_prob).any()\n return log_prob[:, 0]\n def get_kl_of_action_dists(self, pd1, pd2):\n \"\"\"\n Get the KL divergence between two action probability distributions\n \"\"\"\n return self.pi_head.kl_divergence(pd1, pd2)\n def get_output_for_observation(self, obs, state_in, first):\n \"\"\"\n Return gradient-enabled outputs for given observation.\n Use `get_logprob_of_action` to get log probability of action\n with the given probability distribution.\n Returns:\n - probability distribution given observation\n - value prediction for given observation\n - new state\n \"\"\"\n # We need to add a fictitious time dimension everywhere",
+ "type": "code",
+ "location": "/lib/policy.py:271-299"
+ },
+ "271": {
+ "file_id": 14,
+ "content": "This code defines three functions for handling actions and probabilities in a policy network. The first function `get_logprob_of_action` calculates the log probability of taking a given action based on the provided probability distribution. The second function `get_kl_of_action_dists` computes the KL divergence between two action probability distributions. Lastly, the `get_output_for_observation` function returns the probability distribution, value prediction, and new state for a given observation using the previous two functions.",
+ "type": "comment"
+ },
+ "272": {
+ "file_id": 14,
+ "content": " obs = tree_map(lambda x: x.unsqueeze(1), obs)\n first = first.unsqueeze(1)\n (pd, vpred, _), state_out = self(obs=obs, first=first, state_in=state_in)\n return pd, self.value_head.denormalize(vpred)[:, 0], state_out\n @th.no_grad()\n def act(self, obs, first, state_in, stochastic: bool = True, taken_action=None, return_pd=False):\n # We need to add a fictitious time dimension everywhere\n obs = tree_map(lambda x: x.unsqueeze(1), obs)\n first = first.unsqueeze(1)\n (pd, vpred, _), state_out = self(obs=obs, first=first, state_in=state_in)\n if taken_action is None:\n ac = self.pi_head.sample(pd, deterministic=not stochastic)\n else:\n ac = tree_map(lambda x: x.unsqueeze(1), taken_action)\n log_prob = self.pi_head.logprob(ac, pd)\n assert not th.isnan(log_prob).any()\n # After unsqueezing, squeeze back to remove fictitious time dimension\n result = {\"log_prob\": log_prob[:, 0], \"vpred\": self.value_head.denormalize(vpred)[:, 0]}",
+ "type": "code",
+ "location": "/lib/policy.py:300-323"
+ },
+ "273": {
+ "file_id": 14,
+ "content": "Code is adding a time dimension to the observations and first state, then passing them through the model to get policies (pd), value predictions (vpred), and update the state. If a taken action is provided, it uses that for the current step instead of sampling from the policy. It calculates the log probability of the taken action and stores the results in a dictionary with keys \"log_prob\" and \"vpred\". The time dimension is removed after calculations are done.",
+ "type": "comment"
+ },
+ "274": {
+ "file_id": 14,
+ "content": " if return_pd:\n result[\"pd\"] = tree_map(lambda x: x[:, 0], pd)\n ac = tree_map(lambda x: x[:, 0], ac)\n return ac, state_out, result\n @th.no_grad()\n def v(self, obs, first, state_in):\n \"\"\"Predict value for a given mdp observation\"\"\"\n obs = tree_map(lambda x: x.unsqueeze(1), obs)\n first = first.unsqueeze(1)\n (pd, vpred, _), state_out = self(obs=obs, first=first, state_in=state_in)\n # After unsqueezing, squeeze back\n return self.value_head.denormalize(vpred)[:, 0]\nclass InverseActionNet(MinecraftPolicy):\n \"\"\"\n Args:\n conv3d_params: PRE impala 3D CNN params. They are just passed into th.nn.Conv3D.\n \"\"\"\n def __init__(\n self,\n hidsize=512,\n conv3d_params=None,\n **MCPoliy_kwargs,\n ):\n super().__init__(\n hidsize=hidsize,\n # If we're using 3dconv, then we normalize entire impala otherwise don't\n # normalize the first impala layer since we normalize the input",
+ "type": "code",
+ "location": "/lib/policy.py:324-357"
+ },
+ "275": {
+ "file_id": 14,
+ "content": "This code defines a class called \"InverseActionNet\" that inherits from another class named \"MinecraftPolicy\". The class has an initializer which takes parameters for hidden size, 3D convolution parameters, and any other arguments passed to the parent class. It also contains two methods: \"policy\" and \"v\".\n\nThe \"policy\" method calculates the policy distribution (pd) and the value prediction (vpred) for a given observation (obs). It returns the pd, vpred, and an additional state_out. If return_pd is True, it also returns the first element of each vector in the pd array by using tree_map lambda function.\n\nThe \"v\" method predicts the value for a given MDP observation. It takes obs, first, and state_in as input parameters. After unsqueezing the obs and first variables, it calls the parent class's __call__ method to get pd, vpred, and state_out. Finally, it returns the denormalized vpred value of the first element in each vector by using self.value_head.denormalize function.",
+ "type": "comment"
+ },
+ "276": {
+ "file_id": 14,
+ "content": " first_conv_norm=conv3d_params is not None,\n **MCPoliy_kwargs,\n )\n self.conv3d_layer = None\n if conv3d_params is not None:\n # 3D conv is the first layer, so don't normalize its input\n conv3d_init_params = deepcopy(self.init_norm_kwargs)\n conv3d_init_params[\"group_norm_groups\"] = None\n conv3d_init_params[\"batch_norm\"] = False\n self.conv3d_layer = FanInInitReLULayer(\n layer_type=\"conv3d\",\n log_scope=\"3d_conv\",\n **conv3d_params,\n **conv3d_init_params,\n )\n def forward(self, ob, state_in, context):\n first = context[\"first\"]\n x = self.img_preprocess(ob[\"img\"])\n # Conv3D Prior to Impala\n if self.conv3d_layer is not None:\n x = self._conv3d_forward(x)\n # Impala Stack\n x = self.img_process(x)\n if self.recurrent_layer is not None:\n x, state_out = self.recurrent_layer(x, first, state_in)",
+ "type": "code",
+ "location": "/lib/policy.py:358-386"
+ },
+ "277": {
+ "file_id": 14,
+ "content": "This code initializes a 3D convolution layer if the `conv3d_params` is not None. It also sets the initialization parameters for the 3D conv layer differently to avoid normalization of its input. The forward function applies the 3D convolution (if available) before processing the image stack.",
+ "type": "comment"
+ },
+ "278": {
+ "file_id": 14,
+ "content": " x = F.relu(x, inplace=False)\n pi_latent = self.lastlayer(x)\n pi_latent = self.final_ln(x)\n return (pi_latent, None), state_out\n def _conv3d_forward(self, x):\n # Convert from (B, T, H, W, C) -> (B, H, W, C, T)\n x = transpose(x, \"bthwc\", \"bcthw\")\n new_x = []\n for mini_batch in th.split(x, 1):\n new_x.append(self.conv3d_layer(mini_batch))\n x = th.cat(new_x)\n # Convert back\n x = transpose(x, \"bcthw\", \"bthwc\")\n return x\nclass InverseActionPolicy(nn.Module):\n def __init__(\n self,\n action_space,\n pi_head_kwargs=None,\n idm_net_kwargs=None,\n ):\n super().__init__()\n self.action_space = action_space\n self.net = InverseActionNet(**idm_net_kwargs)\n pi_out_size = self.net.output_latent_size()\n pi_head_kwargs = {} if pi_head_kwargs is None else pi_head_kwargs\n self.pi_head = self.make_action_head(pi_out_size=pi_out_size, **pi_head_kwargs)\n def make_action_head(self, **kwargs):",
+ "type": "code",
+ "location": "/lib/policy.py:388-424"
+ },
+ "279": {
+ "file_id": 14,
+ "content": "The code defines a class `InverseActionPolicy` that inherits from `nn.Module`. This class represents an inverse action policy for a reinforcement learning model. It consists of two components: a network (`self.net`) and a policy head (`self.pi_head`). The network is responsible for mapping observations to a latent space, while the policy head maps the latent representation to a distribution over actions.\n\nThe `__init__` method initializes the instance of the class by setting the action space, creating an instance of the `InverseActionNet`, and then creating the policy head based on the specified output size from the network and any additional keyword arguments provided.\n\nThe `_conv3d_forward` function is a helper function that performs 3D convolution on input data and returns the result. It transposes the input tensor, applies a series of 1D convolutions along different axes, and then transposes the resulting tensor back to the original format.\n\nThe `_policy_and_value` method calculates the policy and value for an input observation by passing it through the network and policy head, applying a ReLU activation function, and normalizing the output distribution. It also returns the current internal state of the module.",
+ "type": "comment"
+ },
+ "280": {
+ "file_id": 14,
+ "content": " return make_action_head(self.action_space, **kwargs)\n def reset_parameters(self):\n super().reset_parameters()\n self.net.reset_parameters()\n self.pi_head.reset_parameters()\n def forward(self, obs, first: th.Tensor, state_in, **kwargs):\n if isinstance(obs, dict):\n # We don't want to mutate the obs input.\n obs = obs.copy()\n # If special \"mask\" key is in obs,\n # It's for masking the logits.\n # We take it out (the network doesn't need it)\n mask = obs.pop(\"mask\", None)\n else:\n mask = None\n (pi_h, _), state_out = self.net(obs, state_in=state_in, context={\"first\": first}, **kwargs)\n pi_logits = self.pi_head(pi_h, mask=mask)\n return (pi_logits, None, None), state_out\n @th.no_grad()\n def predict(\n self,\n obs,\n deterministic: bool = True,\n **kwargs,\n ):\n (pd, _, _), state_out = self(obs=obs, **kwargs)\n ac = self.pi_head.sample(pd, deterministic=deterministic)",
+ "type": "code",
+ "location": "/lib/policy.py:425-457"
+ },
+ "281": {
+ "file_id": 14,
+ "content": "This code defines a policy class for training reinforcement learning models. It has methods to reset the model's parameters, forward pass to obtain action-value logits and state, and a deterministic prediction method. The code uses PyTorch for tensor operations.",
+ "type": "comment"
+ },
+ "282": {
+ "file_id": 14,
+ "content": " log_prob = self.pi_head.logprob(ac, pd)\n assert not th.isnan(log_prob).any()\n result = {\"log_prob\": log_prob, \"pd\": pd}\n return ac, state_out, result\n def initial_state(self, batch_size: int):\n return self.net.initial_state(batch_size)",
+ "type": "code",
+ "location": "/lib/policy.py:458-467"
+ },
+ "283": {
+ "file_id": 14,
+ "content": "The code computes the log probability of actions using the pi_head, checks for NaN values, and returns a dictionary containing the log_probability and pd. It also includes functions for initializing the state based on batch size.",
+ "type": "comment"
+ },
+ "284": {
+ "file_id": 15,
+ "content": "/lib/scaled_mse_head.py",
+ "type": "filepath"
+ },
+ "285": {
+ "file_id": 15,
+ "content": "The code defines a ScaledMSEHead class for a linear output layer, scales targets to N(0, 1), and calculates MSE loss between normalized predictions and denormalized target values in a normalized space.",
+ "type": "summary"
+ },
+ "286": {
+ "file_id": 15,
+ "content": "from typing import Dict, Optional\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.nn.init as init\nfrom lib.action_head import fan_in_linear\nfrom lib.normalize_ewma import NormalizeEwma\nclass ScaledMSEHead(nn.Module):\n \"\"\"\n Linear output layer that scales itself so that targets are always normalized to N(0, 1)\n \"\"\"\n def __init__(\n self, input_size: int, output_size: int, norm_type: Optional[str] = \"ewma\", norm_kwargs: Optional[Dict] = None\n ):\n super().__init__()\n self.input_size = input_size\n self.output_size = output_size\n self.norm_type = norm_type\n self.linear = nn.Linear(self.input_size, self.output_size)\n norm_kwargs = {} if norm_kwargs is None else norm_kwargs\n self.normalizer = NormalizeEwma(output_size, **norm_kwargs)\n def reset_parameters(self):\n init.orthogonal_(self.linear.weight)\n fan_in_linear(self.linear)\n self.normalizer.reset_parameters()\n def forward(self, input_data):\n return self.linear(input_data)",
+ "type": "code",
+ "location": "/lib/scaled_mse_head.py:1-35"
+ },
+ "287": {
+ "file_id": 15,
+ "content": "This code defines a ScaledMSEHead class which is a linear output layer. It scales itself so that targets are always normalized to N(0, 1). The input size, output size, normalization type (ewma), and normalization kwargs can be set upon instantiation. The reset_parameters function initializes the weights with orthogonal initialization and resets the normalizer's parameters. Forward function passes input data through a linear layer.",
+ "type": "comment"
+ },
+ "288": {
+ "file_id": 15,
+ "content": " def loss(self, prediction, target):\n \"\"\"\n Calculate the MSE loss between output and a target.\n 'Prediction' has to be normalized while target is denormalized.\n Loss is calculated in a 'normalized' space.\n \"\"\"\n return F.mse_loss(prediction, self.normalizer(target), reduction=\"mean\")\n def denormalize(self, input_data):\n \"\"\"Convert input value from a normalized space into the original one\"\"\"\n return self.normalizer.denormalize(input_data)\n def normalize(self, input_data):\n return self.normalizer(input_data)",
+ "type": "code",
+ "location": "/lib/scaled_mse_head.py:37-50"
+ },
+ "289": {
+ "file_id": 15,
+ "content": "This code defines a loss function for MSE (Mean Squared Error) and normalization/denormalization functions. It calculates the MSE loss between normalized prediction and denormalized target values in a normalized space, and converts input values from normalized to original space.",
+ "type": "comment"
+ },
+ "290": {
+ "file_id": 16,
+ "content": "/lib/torch_util.py",
+ "type": "filepath"
+ },
+ "291": {
+ "file_id": 16,
+ "content": "The code handles libraries, device defaults, tensor and layer functions, CUDA availability, LayerNorm creation, dimension flattening, sequential application of layers, parameter loading from multiple paths, and function state saving. The function takes a dtype string and converts it to PyTorch tensor data type, with an index function for batched broadcasting 'xi' along specified 'gather_dim'.",
+ "type": "summary"
+ },
+ "292": {
+ "file_id": 16,
+ "content": "import functools\nimport itertools\nimport math\nimport os\nimport pickle\nimport re\nimport subprocess\nimport tempfile\nfrom contextlib import contextmanager\nfrom hashlib import md5, sha1\nimport numpy as np\nimport torch as th\nimport torch.distributed as dist\nimport torch.distributions as dis\nimport torch.nn.functional as F\nfrom torch import nn\nimport lib.tree_util as tree_util\nfrom lib import misc\ndef contextmanager_to_decorator(cm):\n def decorator(fn):\n @functools.wraps(fn)\n def newfn(*args, **kwargs):\n with cm():\n return fn(*args, **kwargs)\n return newfn\n return decorator\ndef have_cuda():\n return th.has_cuda\ndef default_device_type():\n return \"cuda\" if have_cuda() else \"cpu\"\nno_grad = contextmanager_to_decorator(th.no_grad)\nDEFAULT_DEVICE = th.device(type=default_device_type())\ndef set_default_torch_device(device):\n global DEFAULT_DEVICE\n DEFAULT_DEVICE = th.device(device)\ndef dev():\n return DEFAULT_DEVICE\ndef zeros(*args, **kwargs):\n return th.zeros(*args, **kwargs, device=dev())",
+ "type": "code",
+ "location": "/lib/torch_util.py:1-57"
+ },
+ "293": {
+ "file_id": 16,
+ "content": "This code imports various libraries, defines a function to convert context managers into decorators, checks if CUDA is available, sets the default device as either CUDA or CPU depending on availability, and then defines functions for creating tensors with zeros.",
+ "type": "comment"
+ },
+ "294": {
+ "file_id": 16,
+ "content": "def ones(*args, **kwargs):\n return th.ones(*args, **kwargs, device=dev())\ndef arange(*args, **kwargs):\n return th.arange(*args, **kwargs, device=dev())\ndef NormedLinear(*args, scale=1.0, dtype=th.float32, **kwargs):\n \"\"\"\n nn.Linear but with normalized fan-in init\n \"\"\"\n dtype = parse_dtype(dtype)\n if dtype == th.float32:\n out = nn.Linear(*args, **kwargs)\n elif dtype == th.float16:\n out = LinearF16(*args, **kwargs)\n else:\n raise ValueError(dtype)\n out.weight.data *= scale / out.weight.norm(dim=1, p=2, keepdim=True)\n if kwargs.get(\"bias\", True):\n out.bias.data *= 0\n return out\nclass LinearF16(nn.Linear):\n def forward(self, x):\n return F.linear(x, self.weight.half(), self.bias.half() if self.bias is not None else None)\nclass LayerNormF16(nn.LayerNorm):\n def forward(self, x):\n return F.layer_norm(x, self.normalized_shape, self.weight.half(), self.bias.half(), self.eps)\ndef LayerNorm(*args, dtype=th.float32, **kwargs):\n dtype = parse_dtype(dtype)",
+ "type": "code",
+ "location": "/lib/torch_util.py:60-96"
+ },
+ "295": {
+ "file_id": 16,
+ "content": "Code defines functions for creating normalized Linear layers, F16 linear and LayerNorm modules. It also includes a utility function to create Tensor objects on the device specified by dev() function.",
+ "type": "comment"
+ },
+ "296": {
+ "file_id": 16,
+ "content": " if dtype == th.float32:\n out = nn.LayerNorm(*args, **kwargs)\n elif dtype == th.float16:\n out = LayerNormF16(*args, **kwargs)\n else:\n raise ValueError(dtype)\n out.weight.no_scale = True\n return out\ndef flatten_image(x):\n \"\"\"\n Flattens last three dims\n \"\"\"\n *batch_shape, h, w, c = x.shape\n return x.reshape((*batch_shape, h * w * c))\ndef sequential(layers, x, *args, diag_name=None, use_checkpoint=False):\n for (i, layer) in enumerate(layers):\n x = layer(x, *args)\n return x\n@no_grad\ndef load_average_with_metadata(paths, overrides):\n n_models = len(paths)\n model, metadata = load_with_metadata(paths[0], overrides=overrides)\n for p in model.parameters():\n p.mul_(1 / n_models)\n for p in paths[1:]:\n new_model, _ = load_with_metadata(p, overrides=overrides)\n for (n1, p1), (n2, p2) in misc.safezip(model.named_parameters(), new_model.named_parameters()):\n assert n1 == n2, f\"names {n1} and {n2} don't match\"\n p1.add_(p2.mul_(1 / n_models))",
+ "type": "code",
+ "location": "/lib/torch_util.py:97-131"
+ },
+ "297": {
+ "file_id": 16,
+ "content": "- Code snippets from \"Video-Pre-Training/lib/torch_util.py\":\n- 96-130: LayerNorm creation depending on dtype (float32, float16), sets weight no_scale to True.\n- flatten_image: Flattens the last three dimensions of a tensor.\n- sequential: Applies layers in order to input tensor, returns final result.\n- load_average_with_metadata: Loads models from multiple paths and averages their parameters.",
+ "type": "comment"
+ },
+ "298": {
+ "file_id": 16,
+ "content": " return model, metadata\ndef save_kwargs(fn):\n \"\"\"\n This decorator passes through the user-provided kwargs and adds one more, called\n save_kwargs, mapping to {\"create_fn\" : name_of_decorated_fn, \"kwargs\" : other_kwargs}\n You put on this decorator on a function that creates a pytorch module. This will\n save the kwargs and the function that was used to create the module.\n This lets us restore the model state later.\n \"\"\"\n @functools.wraps(fn)\n def wrapper(**kwargs):\n if \"save_kwargs\" in kwargs:\n return fn(**kwargs)\n else:\n sk = {**kwargs, \"create_fn\": f\"{fn.__module__}:{fn.__name__}\"}\n return fn(save_kwargs=sk, **kwargs)\n return wrapper\ndef parse_dtype(x):\n if isinstance(x, th.dtype):\n return x\n elif isinstance(x, str):\n if x == \"float32\" or x == \"float\":\n return th.float32\n elif x == \"float64\" or x == \"double\":\n return th.float64\n elif x == \"float16\" or x == \"half\":\n return th.float16",
+ "type": "code",
+ "location": "/lib/torch_util.py:132-165"
+ },
+ "299": {
+ "file_id": 16,
+ "content": "The code defines a decorator, save_kwargs, that allows saving the function and its arguments used to create a PyTorch module, enabling later restoration of the model state. It also includes a utility function, parse_dtype, for converting data types into their equivalent PyTorch dtype objects.",
+ "type": "comment"
+ }
+}
\ No newline at end of file
diff --git a/docs/data/3.json b/docs/data/3.json
new file mode 100644
index 0000000..3139d10
--- /dev/null
+++ b/docs/data/3.json
@@ -0,0 +1,546 @@
+{
+ "300": {
+ "file_id": 16,
+ "content": " elif x == \"uint8\":\n return th.uint8\n elif x == \"int8\":\n return th.int8\n elif x == \"int16\" or x == \"short\":\n return th.int16\n elif x == \"int32\" or x == \"int\":\n return th.int32\n elif x == \"int64\" or x == \"long\":\n return th.int64\n elif x == \"bool\":\n return th.bool\n else:\n raise ValueError(f\"cannot parse {x} as a dtype\")\n else:\n raise TypeError(f\"cannot parse {type(x)} as dtype\")\ndef index(x, i):\n \"\"\"\n Batched, broadcasting index of x along dimension i.ndim.\n For example, if x has shape (1, 2, 3, 4, 5) and i has shape (1, 1, 3)\n then the result has shape (1, 2, 3, 5) and each value in i must be between 0 and 3.\n \"\"\"\n assert x.ndim >= i.ndim + 1\n gather_dim = i.ndim\n while i.ndim < x.ndim:\n i = i.unsqueeze(-1)\n expand_shape = list(x.shape)\n expand_shape[gather_dim] = 1\n i = i.expand(*expand_shape)\n xi = th.gather(x, gather_dim, i)\n assert xi.shape[gather_dim] == 1",
+ "type": "code",
+ "location": "/lib/torch_util.py:166-199"
+ },
+ "301": {
+ "file_id": 16,
+ "content": "This function parses a dtype string and returns the corresponding PyTorch tensor data type. It also provides an index function for batched, broadcasting index of x along dimension i.ndim. The index function ensures that the input shape is compatible with the tensor shape and expands or gathers the tensor accordingly.",
+ "type": "comment"
+ },
+ "302": {
+ "file_id": 16,
+ "content": " return xi.squeeze(gather_dim)",
+ "type": "code",
+ "location": "/lib/torch_util.py:200-200"
+ },
+ "303": {
+ "file_id": 16,
+ "content": "This function is squeezing the dimensions of the tensor 'xi' based on the value in 'gather_dim'.",
+ "type": "comment"
+ },
+ "304": {
+ "file_id": 17,
+ "content": "/lib/tree_util.py",
+ "type": "filepath"
+ },
+ "305": {
+ "file_id": 17,
+ "content": "The code includes utility functions for manipulating data structures, defines partial applications and safe mapping functions for multiple lists, and registers different data types and their conversion functions for serialization using the PyTree API.",
+ "type": "summary"
+ },
+ "306": {
+ "file_id": 17,
+ "content": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# https://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# Copied this from jax, made it self-contained\n# Currently just used for improved_checkpoint\nimport collections\nimport functools\nimport itertools as it\nfrom collections.abc import Collection\nfrom typing import Dict, List, Optional\ndef unzip2(xys):\n xs = []\n ys = []\n for x, y in xys:\n xs.append(x)\n ys.append(y)\n return tuple(xs), tuple(ys)\ndef partial(fun, *args, **kwargs):\n wrapped = functools.partial(fun, *args, **kwargs)",
+ "type": "code",
+ "location": "/lib/tree_util.py:1-35"
+ },
+ "307": {
+ "file_id": 17,
+ "content": "This code block contains utility functions for manipulating dictionaries, tuples and lists. It defines two functions: unzip2 and partial. Unzip2 takes a list of pairs (x, y) and returns the corresponding x and y as separate tuples. Partial is a wrapper function that creates a partial application of another function with specified arguments or keyword arguments.",
+ "type": "comment"
+ },
+ "308": {
+ "file_id": 17,
+ "content": " functools.update_wrapper(wrapped, fun)\n wrapped._bound_args = args # pylint: disable=protected-access\n return wrapped\ndef safe_zip(*args: Collection) -> List[tuple]:\n n = len(args[0])\n for arg in args[1:]:\n assert len(arg) == n, \"length mismatch: {}\".format(list(map(len, args)))\n return list(zip(*args))\ndef safe_map(f, *args):\n args = list(map(list, args))\n n = len(args[0])\n for arg in args[1:]:\n assert len(arg) == n, \"length mismatch: {}\".format(list(map(len, args)))\n return list(map(f, *args))\ndef tree_map(f, tree, treat_as_leaves: Optional[List] = None):\n \"\"\"Map a function over a pytree to produce a new pytree.\n Args:\n f: function to be applied at each leaf.\n tree: a pytree to be mapped over.\n Returns:\n A new pytree with the same structure as `tree` but with the value at each\n leaf given by `f(x)` where `x` is the value at the corresponding leaf in\n `tree`.\n \"\"\"\n if treat_as_leaves is None:\n treat_as_leaves = []\n node_type = node_types.get(type(tree))",
+ "type": "code",
+ "location": "/lib/tree_util.py:36-70"
+ },
+ "309": {
+ "file_id": 17,
+ "content": "This code contains three functions:\n1. `safe_zip()` - creates a list of tuples from multiple lists, asserting that all lists have the same length.\n2. `safe_map()` - applies a function to each element in a list (or multiple lists), asserting that all lists have the same length.\n3. `tree_map()` - maps a function over a pytree and returns a new pytree with the same structure, but with values at leaves given by applying the function to corresponding leaf values in the original tree.",
+ "type": "comment"
+ },
+ "310": {
+ "file_id": 17,
+ "content": " if node_type and type(tree) not in treat_as_leaves:\n children, node_spec = node_type.to_iterable(tree)\n new_children = [tree_map(f, child, treat_as_leaves) for child in children]\n return node_type.from_iterable(node_spec, new_children)\n else:\n return f(tree)\ndef tree_multimap(f, tree, *rest, treat_as_leaves: Optional[List] = None):\n \"\"\"Map a multi-input function over pytree args to produce a new pytree.\n Args:\n f: function that takes `1 + len(rest)` arguments, to be applied at the\n corresponding leaves of the pytrees.\n tree: a pytree to be mapped over, with each leaf providing the first\n positional argument to `f`.\n *rest: a tuple of pytrees, each with the same structure as `tree`.\n Returns:\n A new pytree with the same structure as `tree` but with the value at each\n leaf given by `f(x, *xs)` where `x` is the value at the corresponding leaf\n in `tree` and `xs` is the tuple of values at corresponding leaves in `rest`.\n \"\"\"",
+ "type": "code",
+ "location": "/lib/tree_util.py:71-93"
+ },
+ "311": {
+ "file_id": 17,
+ "content": "This function applies a multi-input function to the leaves of a pytree and its sibling pytrees, returning a new pytree with values determined by `f(x, *xs)` where `x` is the value at the corresponding leaf in the original tree and `xs` is the tuple of values at corresponding leaves in the sibling trees. If the node type allows for further processing, it iterates over the children and applies the function to each child and its corresponding siblings before returning a new pytree.",
+ "type": "comment"
+ },
+ "312": {
+ "file_id": 17,
+ "content": " if treat_as_leaves is None:\n treat_as_leaves = []\n node_type = node_types.get(type(tree))\n if node_type and type(tree) not in treat_as_leaves:\n children, node_spec = node_type.to_iterable(tree)\n all_children = [children]\n for other_tree in rest:\n other_children, other_node_data = node_type.to_iterable(other_tree)\n if other_node_data != node_spec:\n raise TypeError(\"Mismatch: {} != {}\".format(other_node_data, node_spec))\n all_children.append(other_children)\n new_children = [tree_multimap(f, *xs, treat_as_leaves=treat_as_leaves) for xs in zip(*all_children)]\n return node_type.from_iterable(node_spec, new_children)\n else:\n return f(tree, *rest)\ndef prefix_multimap(f, treedef, tree, *rest):\n \"\"\"Like tree_multimap but only maps down through a tree prefix.\"\"\"\n if isinstance(treedef, PyLeaf):\n return f(tree, *rest)\n else:\n node_type = node_types.get(type(tree))\n if node_type != treedef.node_type:",
+ "type": "code",
+ "location": "/lib/tree_util.py:95-119"
+ },
+ "313": {
+ "file_id": 17,
+ "content": "This code is determining the appropriate node type for a given tree and iterating through trees to ensure they match. It then applies a function f to each tree, handling different cases based on whether the node type is specified or not.",
+ "type": "comment"
+ },
+ "314": {
+ "file_id": 17,
+ "content": " raise TypeError(\"Mismatch: {} != {}\".format(treedef.node_type, node_type))\n children, node_data = node_type.to_iterable(tree)\n if node_data != treedef.node_data:\n raise TypeError(\"Mismatch: {} != {}\".format(treedef.node_data, node_data))\n all_children = [children]\n for other_tree in rest:\n other_children, other_node_data = node_type.to_iterable(other_tree)\n if other_node_data != node_data:\n raise TypeError(\"Mismatch: {} != {}\".format(other_node_data, node_data))\n all_children.append(other_children)\n all_children = zip(*all_children)\n new_children = [prefix_multimap(f, td, *xs) for td, xs in zip(treedef.children, all_children)]\n return node_type.from_iterable(node_data, new_children)\ndef walk_pytree(f_node, f_leaf, tree, treat_as_leaves: Optional[List] = None):\n node_type = node_types.get(type(tree))\n if treat_as_leaves is None:\n treat_as_leaves = []\n if node_type and type(tree) not in treat_as_leaves:",
+ "type": "code",
+ "location": "/lib/tree_util.py:120-141"
+ },
+ "315": {
+ "file_id": 17,
+ "content": "Code is iterating over a tree structure and checking if the nodes match in terms of node type, node data, and number of children. If any mismatches are found, a TypeError is raised. The function walks through the tree recursively and applies functions to both leaf and non-leaf nodes based on their types. Optional treat_as_leaves list specifies which types should be treated as leaves.",
+ "type": "comment"
+ },
+ "316": {
+ "file_id": 17,
+ "content": " children, node_spec = node_type.to_iterable(tree)\n proc_children, child_specs = unzip2([walk_pytree(f_node, f_leaf, child, treat_as_leaves) for child in children])\n tree_def = PyTreeDef(node_type, node_spec, child_specs)\n return f_node(proc_children), tree_def\n else:\n return f_leaf(tree), PyLeaf()\ndef build_tree(treedef, xs):\n if isinstance(treedef, PyLeaf):\n return xs\n else:\n # We use 'iter' for clearer error messages\n children = safe_map(build_tree, iter(treedef.children), iter(xs))\n return treedef.node_type.from_iterable(treedef.node_data, children)\ndef _tree_unflatten(xs, treedef):\n if isinstance(treedef, PyLeaf):\n return next(xs)\n else:\n children = safe_map(partial(_tree_unflatten, xs), treedef.children)\n return treedef.node_type.from_iterable(treedef.node_data, children)\ndef _num_leaves(treedef):\n return 1 if isinstance(treedef, PyLeaf) else sum(safe_map(_num_leaves, treedef.children))\ndef _nested_treedef(inner, outer):",
+ "type": "code",
+ "location": "/lib/tree_util.py:142-171"
+ },
+ "317": {
+ "file_id": 17,
+ "content": "Function `to_iterable` splits node type and specifications, walks tree using function `walk_pytree`, unzips the result into procedure children and child specifications, creates a `PyTreeDef` object, and returns the processed children and tree definition.\n`build_tree` recursively builds the tree by calling itself on each child of the current node and constructs the final node using the node type and data.\n`_tree_unflatten` recursively unflattens the tree by calling itself on each child of the current node.\nFunction `_num_leaves` returns 1 if the treedef is a leaf, otherwise it sums the number of leaves in each child.\nFunction `_nested_treedef` takes two node types and returns a nested tree definition.",
+ "type": "comment"
+ },
+ "318": {
+ "file_id": 17,
+ "content": " # just used in tree_transpose error checking\n if isinstance(outer, PyLeaf):\n return inner\n else:\n children = safe_map(partial(_nested_treedef, inner), outer.children)\n return PyTreeDef(outer.node_type, outer.node_data, tuple(children))\nclass PyTreeDef(object):\n def __init__(self, node_type, node_data, children):\n self.node_type = node_type\n self.node_data = node_data\n self.children = children\n def __repr__(self):\n if self.node_data is None:\n data_repr = \"\"\n else:\n data_repr = \"[{}]\".format(self.node_data)\n return \"PyTree({}{}, [{}])\".format(self.node_type.name, data_repr, \",\".join(safe_map(repr, self.children)))\n def __hash__(self):\n return hash((self.node_type, self.node_data, tuple(self.children)))\n def __eq__(self, other):\n if isinstance(other, PyLeaf):\n return False\n else:\n return self.node_type == other.node_type and self.node_data == other.node_data and self.children == other.children",
+ "type": "code",
+ "location": "/lib/tree_util.py:172-201"
+ },
+ "319": {
+ "file_id": 17,
+ "content": "The code defines a `PyTreeDef` class representing nodes in a tree structure. It checks if the input is a leaf node, then creates children objects using `_nested_treedef` and `safe_map`, and returns an instance of `PyTreeDef` with the given node type, data, and children. The class also provides a custom `__repr__` method for string representation, a `__hash__` method for hashability, and an `__eq__` method for equality comparison.",
+ "type": "comment"
+ },
+ "320": {
+ "file_id": 17,
+ "content": " def __ne__(self, other):\n return not self == other\nclass PyLeaf(object):\n def __repr__(self):\n return \"*\"\n def __eq__(self, other):\n return isinstance(other, PyLeaf)\nclass NodeType(object):\n def __init__(self, name, to_iterable, from_iterable):\n self.name = name\n self.to_iterable = to_iterable\n self.from_iterable = from_iterable\nnode_types: Dict[type, NodeType] = {}\ndef register_pytree_node(py_type, to_iterable, from_iterable):\n assert py_type not in node_types\n node_types[py_type] = NodeType(str(py_type), to_iterable, from_iterable)\ndef tuple_to_iterable(xs):\n return xs, None\ndef tuple_from_iterable(_keys, xs):\n return tuple(xs)\ndef list_to_iterable(xs):\n return tuple(xs), None\ndef list_from_iterable(_keys, xs):\n return list(xs)\ndef dict_to_iterable(xs):\n keys = tuple(sorted(xs.keys()))\n return tuple(map(xs.get, keys)), keys\ndef dict_from_iterable(keys, xs):\n return dict(safe_zip(keys, xs))\ndef ordered_dict_from_iterable(keys, xs):\n return collections.OrderedDict(safe_zip(keys, xs))",
+ "type": "code",
+ "location": "/lib/tree_util.py:203-256"
+ },
+ "321": {
+ "file_id": 17,
+ "content": "This code defines a class hierarchy for representing and serializing objects in a tree-like structure. It includes methods for converting different types of collections (tuples, lists, dictionaries) to iterable and back. The `NodeType` class stores information about the type and its conversion rules, which are registered using the `register_pytree_node` function.",
+ "type": "comment"
+ },
+ "322": {
+ "file_id": 17,
+ "content": "def default_dict_to_iterable(xs):\n return (tuple(xs.values()), (xs.default_factory, tuple(xs.keys())))\ndef default_dict_from_iterable(keys, xs):\n return collections.defaultdict(keys[0], safe_zip(keys[1], xs))\ndef none_to_iterable(_xs):\n return (), None\ndef none_from_iterable(_keys, _xs):\n return None\nregister_pytree_node(tuple, tuple_to_iterable, tuple_from_iterable)\nregister_pytree_node(list, list_to_iterable, list_from_iterable)\nregister_pytree_node(dict, dict_to_iterable, dict_from_iterable)\nregister_pytree_node(collections.OrderedDict, dict_to_iterable, ordered_dict_from_iterable)\nregister_pytree_node(collections.defaultdict, default_dict_to_iterable, default_dict_from_iterable)\nregister_pytree_node(type(None), none_to_iterable, none_from_iterable)",
+ "type": "code",
+ "location": "/lib/tree_util.py:259-280"
+ },
+ "323": {
+ "file_id": 17,
+ "content": "This code registers different data types and their conversion functions for serialization using the PyTree API. It handles tuples, lists, dictionaries (including OrderedDict), collections.defaultdict, and None type. The default_dict_to_iterable, default_dict_from_iterable, none_to_iterable, and none_from_iterable functions handle the conversion to and from iterables for these data types.",
+ "type": "comment"
+ },
+ "324": {
+ "file_id": 18,
+ "content": "/lib/util.py",
+ "type": "filepath"
+ },
+ "325": {
+ "file_id": 18,
+ "content": "The code defines neural network functions for data processing, including ResidualRecurrentBlocks and BatchNorm2d initialization, as well as MLPs, LSTM/RNN layers, Transformer blocks with recurrent forward pass. It also includes a function `get_norm` for normalization and another function `_banded_repeat`.",
+ "type": "summary"
+ },
+ "326": {
+ "file_id": 18,
+ "content": "from typing import Dict, Optional\nimport torch as th\nfrom torch import nn\nfrom torch.nn import functional as F\nimport lib.torch_util as tu\nfrom lib.masked_attention import MaskedAttention\nfrom lib.minecraft_util import store_args\nfrom lib.tree_util import tree_map\ndef get_module_log_keys_recursive(m: nn.Module):\n \"\"\"Recursively get all keys that a module and its children want to log.\"\"\"\n keys = []\n if hasattr(m, \"get_log_keys\"):\n keys += m.get_log_keys()\n for c in m.children():\n keys += get_module_log_keys_recursive(c)\n return keys\nclass FanInInitReLULayer(nn.Module):\n \"\"\"Implements a slightly modified init that correctly produces std 1 outputs given ReLU activation\n :param inchan: number of input channels\n :param outchan: number of output channels\n :param layer_args: positional layer args\n :param layer_type: options are \"linear\" (dense layer), \"conv\" (2D Convolution), \"conv3d\" (3D convolution)\n :param init_scale: multiplier on initial weights\n :param batch_norm: use batch norm after the layer (for 2D data)",
+ "type": "code",
+ "location": "/lib/util.py:1-30"
+ },
+ "327": {
+ "file_id": 18,
+ "content": "This code defines a function `get_module_log_keys_recursive` that recursively collects all keys that a module and its children want to log. It also defines a class `FanInInitReLULayer` which implements a slightly modified initialization for ReLU layers, initializing the weights with standard deviation of 1. The class takes parameters such as number of input and output channels, layer type (linear, conv or conv3d), initialization scale, and whether to use batch normalization.",
+ "type": "comment"
+ },
+ "328": {
+ "file_id": 18,
+ "content": " :param group_norm_groups: if not None, use group norm with this many groups after the layer. Group norm 1\n would be equivalent of layernorm for 2D data.\n :param layer_norm: use layernorm after the layer (for 1D data)\n :param layer_kwargs: keyword arguments for the layer\n \"\"\"\n @store_args\n def __init__(\n self,\n inchan: int,\n outchan: int,\n *layer_args,\n layer_type: str = \"conv\",\n init_scale: int = 1,\n batch_norm: bool = False,\n batch_norm_kwargs: Dict = {},\n group_norm_groups: Optional[int] = None,\n layer_norm: bool = False,\n use_activation=True,\n log_scope: Optional[str] = None,\n **layer_kwargs,\n ):\n super().__init__()\n # Normalization\n self.norm = None\n if batch_norm:\n self.norm = nn.BatchNorm2d(inchan, **batch_norm_kwargs)\n elif group_norm_groups is not None:\n self.norm = nn.GroupNorm(group_norm_groups, inchan)\n elif layer_norm:\n self.norm = nn.LayerNorm(inchan)",
+ "type": "code",
+ "location": "/lib/util.py:31-62"
+ },
+ "329": {
+ "file_id": 18,
+ "content": "This code defines a function `__init__` which initializes an object. It takes various parameters like `inchan`, `outchan`, `layer_args`, `layer_type`, `init_scale`, `batch_norm`, `batch_norm_kwargs`, `group_norm_groups`, `layer_norm`, `use_activation`, and `log_scope`. It also takes keyword arguments like `**layer_kwargs`. The function sets the normalization type based on the values of these parameters. If `batch_norm` is True, it uses BatchNorm2d. If `group_norm_groups` is not None, it uses GroupNorm. And if `layer_norm` is True, it uses LayerNorm. It also sets the norm variable to None initially.",
+ "type": "comment"
+ },
+ "330": {
+ "file_id": 18,
+ "content": " layer = dict(conv=nn.Conv2d, conv3d=nn.Conv3d, linear=nn.Linear)[layer_type]\n self.layer = layer(inchan, outchan, bias=self.norm is None, *layer_args, **layer_kwargs)\n # Init Weights (Fan-In)\n self.layer.weight.data *= init_scale / self.layer.weight.norm(\n dim=tuple(range(1, self.layer.weight.data.ndim)), p=2, keepdim=True\n )\n # Init Bias\n if self.layer.bias is not None:\n self.layer.bias.data *= 0\n def forward(self, x):\n \"\"\"Norm after the activation. Experimented with this for both IAM and BC and it was slightly better.\"\"\"\n if self.norm is not None:\n x = self.norm(x)\n x = self.layer(x)\n if self.use_activation:\n x = F.relu(x, inplace=True)\n return x\n def get_log_keys(self):\n return [\n f\"activation_mean/{self.log_scope}\",\n f\"activation_std/{self.log_scope}\",\n ]\nclass ResidualRecurrentBlocks(nn.Module):\n @store_args\n def __init__(\n self,",
+ "type": "code",
+ "location": "/lib/util.py:64-94"
+ },
+ "331": {
+ "file_id": 18,
+ "content": "This code defines a util module with functions for initializing and forwarding data through neural networks. The ResidualRecurrentBlocks class is used to create residual recurrent blocks, which help in improving the stability of the network during training.",
+ "type": "comment"
+ },
+ "332": {
+ "file_id": 18,
+ "content": " n_block=2,\n recurrence_type=\"multi_layer_lstm\",\n is_residual=True,\n **block_kwargs,\n ):\n super().__init__()\n init_scale = n_block ** -0.5 if is_residual else 1\n self.blocks = nn.ModuleList(\n [\n ResidualRecurrentBlock(\n **block_kwargs,\n recurrence_type=recurrence_type,\n is_residual=is_residual,\n init_scale=init_scale,\n block_number=i,\n )\n for i in range(n_block)\n ]\n )\n def forward(self, x, first, state):\n state_out = []\n assert len(state) == len(\n self.blocks\n ), f\"Length of state {len(state)} did not match length of blocks {len(self.blocks)}\"\n for block, _s_in in zip(self.blocks, state):\n x, _s_o = block(x, first, _s_in)\n state_out.append(_s_o)\n return x, state_out\n def initial_state(self, batchsize):\n if \"lstm\" in self.recurrence_type:",
+ "type": "code",
+ "location": "/lib/util.py:95-126"
+ },
+ "333": {
+ "file_id": 18,
+ "content": "This code defines a class that initializes a list of ResidualRecurrentBlock instances, each with potentially different recurrence_type and block_kwargs. The forward method processes input through each block, while the initial_state method returns an initial state for the LSTM recurrence type based on batch size.",
+ "type": "comment"
+ },
+ "334": {
+ "file_id": 18,
+ "content": " return [None for b in self.blocks]\n else:\n return [b.r.initial_state(batchsize) for b in self.blocks]\nclass ResidualRecurrentBlock(nn.Module):\n @store_args\n def __init__(\n self,\n hidsize,\n timesteps,\n init_scale=1,\n recurrence_type=\"multi_layer_lstm\",\n is_residual=True,\n use_pointwise_layer=True,\n pointwise_ratio=4,\n pointwise_use_activation=False,\n attention_heads=8,\n attention_memory_size=2048,\n attention_mask_style=\"clipped_causal\",\n log_scope=\"resblock\",\n block_number=0,\n ):\n super().__init__()\n self.log_scope = f\"{log_scope}{block_number}\"\n s = init_scale\n if use_pointwise_layer:\n if is_residual:\n s *= 2 ** -0.5 # second residual\n self.mlp0 = FanInInitReLULayer(\n hidsize,\n hidsize * pointwise_ratio,\n init_scale=1,\n layer_type=\"linear\",\n layer_norm=True,",
+ "type": "code",
+ "location": "/lib/util.py:127-161"
+ },
+ "335": {
+ "file_id": 18,
+ "content": "The code defines a ResidualRecurrentBlock class, which is a type of neural network module. It initializes the block with specified parameters like hidsize, timesteps, init_scale, recurrence_type, and more. If is_residual and use_pointwise_layer are True, the mlp0 layer is added to the block with specific size and initialization settings. The method returns an array of initial states for each block in the self.blocks list.",
+ "type": "comment"
+ },
+ "336": {
+ "file_id": 18,
+ "content": " log_scope=self.log_scope + \"/ptwise_mlp0\",\n )\n self.mlp1 = FanInInitReLULayer(\n hidsize * pointwise_ratio,\n hidsize,\n init_scale=s,\n layer_type=\"linear\",\n use_activation=pointwise_use_activation,\n log_scope=self.log_scope + \"/ptwise_mlp1\",\n )\n self.pre_r_ln = nn.LayerNorm(hidsize)\n if recurrence_type in [\"multi_layer_lstm\", \"multi_layer_bilstm\"]:\n self.r = nn.LSTM(hidsize, hidsize, batch_first=True)\n nn.init.normal_(self.r.weight_hh_l0, std=s * (self.r.weight_hh_l0.shape[0] ** -0.5))\n nn.init.normal_(self.r.weight_ih_l0, std=s * (self.r.weight_ih_l0.shape[0] ** -0.5))\n self.r.bias_hh_l0.data *= 0\n self.r.bias_ih_l0.data *= 0\n elif recurrence_type == \"transformer\":\n self.r = MaskedAttention(\n input_size=hidsize,\n timesteps=timesteps,\n memory_size=attention_memory_size,",
+ "type": "code",
+ "location": "/lib/util.py:162-184"
+ },
+ "337": {
+ "file_id": 18,
+ "content": "Creating a multi-layer perceptron (MLP) for pointwise features and layer normalization for pre-training.\n\nInitializing the LSTM or Transformer recurrent layer if specified, using normal distribution with scale 's'.",
+ "type": "comment"
+ },
+ "338": {
+ "file_id": 18,
+ "content": " heads=attention_heads,\n init_scale=s,\n norm=\"none\",\n log_scope=log_scope + \"/sa\",\n use_muP_factor=True,\n mask=attention_mask_style,\n )\n def forward(self, x, first, state):\n residual = x\n x = self.pre_r_ln(x)\n x, state_out = recurrent_forward(\n self.r,\n x,\n first,\n state,\n reverse_lstm=self.recurrence_type == \"multi_layer_bilstm\" and (self.block_number + 1) % 2 == 0,\n )\n if self.is_residual and \"lstm\" in self.recurrence_type: # Transformer already residual.\n x = x + residual\n if self.use_pointwise_layer:\n # Residual MLP\n residual = x\n x = self.mlp1(self.mlp0(x))\n if self.is_residual:\n x = x + residual\n return x, state_out\ndef recurrent_forward(module, x, first, state, reverse_lstm=False):\n if isinstance(module, nn.LSTM):\n if state is not None:",
+ "type": "code",
+ "location": "/lib/util.py:185-216"
+ },
+ "339": {
+ "file_id": 18,
+ "content": "This function defines a recurrent forward pass for a Transformer block. It applies linear layers, LSTM/RNN, and optionally an MLP layer to input `x`. The result is returned along with the updated state.",
+ "type": "comment"
+ },
+ "340": {
+ "file_id": 18,
+ "content": " # In case recurrent models do not accept a \"first\" argument we zero out the hidden state here\n mask = 1 - first[:, 0, None, None].to(th.float)\n state = tree_map(lambda _s: _s * mask, state)\n state = tree_map(lambda _s: _s.transpose(0, 1), state) # NL, B, H\n if reverse_lstm:\n x = th.flip(x, [1])\n x, state_out = module(x, state)\n if reverse_lstm:\n x = th.flip(x, [1])\n state_out = tree_map(lambda _s: _s.transpose(0, 1), state_out) # B, NL, H\n return x, state_out\n else:\n return module(x, first, state)\ndef _banded_repeat(x, t):\n \"\"\"\n Repeats x with a shift.\n For example (ignoring the batch dimension):\n _banded_repeat([A B C D E], 4)\n =\n [D E 0 0 0]\n [C D E 0 0]\n [B C D E 0]\n [A B C D E]\n \"\"\"\n b, T = x.shape\n x = th.cat([x, x.new_zeros(b, t - 1)], dim=1)\n result = x.unfold(1, T, 1).flip(1)\n return result\ndef bandify(b_nd, t, T):\n \"\"\"\n b_nd -> D_ntT, where\n \"n\" indexes over basis functions",
+ "type": "code",
+ "location": "/lib/util.py:217-253"
+ },
+ "341": {
+ "file_id": 18,
+ "content": "This code is initializing a state for a recurrent model and passing input through the model. If reverse_lstm is True, it flips the input and output. The _banded_repeat function repeats an input sequence with a shift and the bandify function converts data from basis functions to a new shape.",
+ "type": "comment"
+ },
+ "342": {
+ "file_id": 18,
+ "content": " \"d\" indexes over time differences\n \"t\" indexes over output time\n \"T\" indexes over input time\n only t >= T is nonzero\n B_ntT[n, t, T] = b_nd[n, t - T]\n \"\"\"\n nbasis, bandsize = b_nd.shape\n b_nd = b_nd[:, th.arange(bandsize - 1, -1, -1)]\n if bandsize >= T:\n b_nT = b_nd[:, -T:]\n else:\n b_nT = th.cat([b_nd.new_zeros(nbasis, T - bandsize), b_nd], dim=1)\n D_tnT = _banded_repeat(b_nT, t)\n return D_tnT\ndef get_norm(name, d, dtype=th.float32):\n if name == \"none\":\n return lambda x: x\n elif name == \"layer\":\n return tu.LayerNorm(d, dtype=dtype)\n else:\n raise NotImplementedError(name)",
+ "type": "code",
+ "location": "/lib/util.py:254-276"
+ },
+ "343": {
+ "file_id": 18,
+ "content": "This code defines a function `get_norm` for normalization, and another function (not shown) called `_banded_repeat`. The `B_ntT` shape is being assigned based on the `b_nd` shape and a time index `T`. If `bandsize >= T`, it assigns `b_nT` as `b_nd[:, -T:]`. Otherwise, it concatenates `b_nd.new_zeros(nbasis, T - bandsize)` and `b_nd` along dimension 1 to form `b_nT`. The function then returns the result of `_banded_repeat(b_nT, t)`.",
+ "type": "comment"
+ },
+ "344": {
+ "file_id": 19,
+ "content": "/lib/xf.py",
+ "type": "filepath"
+ },
+ "345": {
+ "file_id": 19,
+ "content": "The code defines an attention mechanism with preprocessing methods and StridedAttn class, as well as SelfAttentionLayer and residual MLP layers for transformer models, including operations like concatenation, reshaping, and activation functions.",
+ "type": "summary"
+ },
+ "346": {
+ "file_id": 19,
+ "content": "\"\"\"\nImplementation of transformer and reshaping-based sparse transformer\n\"\"\"\nimport functools\nimport math\nimport torch as th\nfrom torch import nn\nfrom torch.nn import functional as F\nfrom lib import misc, mlp\nfrom lib import torch_util as tu\nfrom lib import util\nSENTINEL = 0.1337\ndef attention(\n Q_bte,\n K_bTe,\n V_bTe,\n dtype,\n mask=True,\n extra_btT=None,\n maxlen=None,\n check_sentinel=False,\n use_muP_factor=False,\n):\n \"\"\"\n performs softmax(Q*K)*V operation\n t : output (write) time axis, possibly size=1 for just the last timestep\n T : input (read) time axis\n t < T is OK\n 'check_sentinel' is used when you want to make it impossible to attend to certain keys.\n All keys where every value is equal to the constant SENTINEL will be ignored.\n Currently this is only used by StridedAttn.\n \"\"\"\n assert Q_bte.dtype == K_bTe.dtype == dtype, f\"{Q_bte.dtype}, {K_bTe.dtype}, {dtype} must all match\"\n e = Q_bte.shape[2]\n if check_sentinel:\n invalid = (K_bTe == SENTINEL).int().sum(dim=-1) == e",
+ "type": "code",
+ "location": "/lib/xf.py:1-43"
+ },
+ "347": {
+ "file_id": 19,
+ "content": "The code snippet defines a function named \"attention\" which performs softmax(Q*K)*V operation. It takes query (Q), keys (K), and values (V) as input, along with the data type, mask, additional batch-to-time matrix (extra_btT), maximum length (maxlen), and a flag to check sentinel values (check_sentinel). The function checks if the data types match and then proceeds to perform the softmax operation.",
+ "type": "comment"
+ },
+ "348": {
+ "file_id": 19,
+ "content": " invalid = misc.reshape(invalid, \"b, T\", \"b, 1, T\")\n if isinstance(mask, th.Tensor):\n bias = (~mask).float() * -1e9\n elif mask:\n bias = get_attn_bias_cached(Q_bte.shape[1], K_bTe.shape[1], maxlen=maxlen, device=Q_bte.device, dtype=th.float32)\n else:\n bias = Q_bte.new_zeros((), dtype=th.float32)\n if extra_btT is not None:\n bias = bias + extra_btT\n # Equivalent to bias + (1 / math.sqrt(e)) * th.einsum(\"bte,bpe->btp\", Q_bte, K_bte)\n # but faster:\n logit_btT = th.baddbmm(\n bias,\n Q_bte.float(),\n K_bTe.float().transpose(-1, -2),\n alpha=(1 / e) if use_muP_factor else (1 / math.sqrt(e)),\n )\n if check_sentinel:\n logit_btT = logit_btT - 1e9 * invalid.float()\n W_btT = th.softmax(logit_btT, dim=2).to(dtype)\n if callable(V_bTe):\n # This is used by the sharded video model to defer waiting on\n # the broadcast of the values until they're needed\n V_bTe = V_bTe()\n # th.einsum only lets you use lowercase letters, so 'p' for 'past'",
+ "type": "code",
+ "location": "/lib/xf.py:44-68"
+ },
+ "349": {
+ "file_id": 19,
+ "content": "The code calculates the logits for a multi-head attention mechanism, taking into account masking and optional extra inputs. It applies the necessary transformations to the input tensors and performs the dot product between queries (Q) and keys (K). The result is then normalized using softmax function to obtain the weights (W_btT) for the attention process.",
+ "type": "comment"
+ },
+ "350": {
+ "file_id": 19,
+ "content": " # means 'T'\n A_bte = th.einsum(\"btp,bpe->bte\", W_btT, V_bTe)\n return A_bte\nclass Attn:\n \"\"\"\n Defines an attention mechanism\n All the mechanisms here can be defined by two operations:\n 1. preprocessing Q,K,V,R[=relative attention query]\n to move axes from embedding dimension to\n batch dimension, and possibly doing shifts.\n 2. postprocessing the final result to move axes back to embedding\n axis.\n \"\"\"\n def __init__(self, mask, maxlen):\n self.mask = mask\n self.maxlen = maxlen\n def preproc_qkv(self, Q_bte, K_bte, V_bte):\n raise NotImplementedError\n def preproc_r(self, R_btn):\n raise NotImplementedError\ndef split_heads(x_bte, h):\n b, t, e = x_bte.shape\n assert e % h == 0, \"Embsize must be divisible by number of heads\"\n q = e // h\n x_bthq = x_bte.reshape((b, t, h, q))\n x_bhtq = misc.transpose(x_bthq, \"bthq\", \"bhtq\")\n x_Btq = x_bhtq.reshape((b * h, t, q))\n return x_Btq\nclass All2All(Attn):\n def __init__(self, nhead, maxlen, mask=True, head_dim=None):",
+ "type": "code",
+ "location": "/lib/xf.py:69-107"
+ },
+ "351": {
+ "file_id": 19,
+ "content": "This code defines an attention mechanism class and a function to split input into heads. The attention mechanism is initialized with parameters such as number of heads, maximum length, and mask. The \"preproc_qkv\" and \"preproc_r\" methods for preprocessing Q, K, V, and R are not implemented yet. The code also includes the \"split_heads\" function to split input into multiple heads.",
+ "type": "comment"
+ },
+ "352": {
+ "file_id": 19,
+ "content": " super().__init__(mask=mask, maxlen=maxlen)\n assert (nhead is None) != (head_dim is None), \"exactly one of nhead and head_dim must be specified\"\n self.h = nhead\n self.head_dim = head_dim\n def preproc_qkv(self, *xs):\n q = xs[0].shape[-1]\n for x in xs:\n assert x.shape[-1] == q, \"embedding dimensions do not match\"\n h = self.h or misc.exact_div(q, self.head_dim)\n postproc = functools.partial(self.postproc_a, h=h)\n return (postproc, *tuple(split_heads(x, h) for x in xs))\n def preproc_r(self, R_btn):\n _, ret = self.preproc_qkv(R_btn)\n return ret\n def postproc_a(self, A_Btq, h):\n B, t, q = A_Btq.shape\n b = B // h\n A_bhtq = A_Btq.reshape((b, h, t, q))\n A_bthq = misc.transpose(A_bhtq, \"bhtq\", \"bthq\")\n A_bte = A_bthq.reshape((b, t, h * q))\n return A_bte\ndef _required_padding(dim, target_div):\n if dim % target_div == 0:\n return 0\n else:\n return target_div - dim % target_div",
+ "type": "code",
+ "location": "/lib/xf.py:108-138"
+ },
+ "353": {
+ "file_id": 19,
+ "content": "This code initializes a class with optional nhead and head_dim arguments, and defines preproc_qkv and preproc_r functions to handle input shapes. It also includes a postproc_a function for reshaping the output shape. The _required_padding function checks if padding is needed for certain dimensions.",
+ "type": "comment"
+ },
+ "354": {
+ "file_id": 19,
+ "content": "class StridedAttn(Attn):\n def __init__(self, nhead, stride, maxlen, mask=True):\n super().__init__(mask=mask, maxlen=maxlen)\n self.h = nhead\n self.stride = stride\n def _preproc(self, x, name, Q_t=None, Q_pad=None):\n x, undo = misc.reshape_undo(x, \"b, t*stride, e\", \"b, 1, t, stride*e\", stride=self.stride)\n if name == \"Q\":\n Q_pad = _required_padding(x.shape[2], self.maxlen)\n original_t = x.shape[2]\n x = F.pad(x, (0, 0, 0, Q_pad), value=SENTINEL)\n undo = misc.compose_undo(undo, lambda x: x[:, :, :original_t])\n if name == \"Q\":\n Q_t = x.shape[2]\n assert Q_t % self.maxlen == 0, f\"{Q_t} % {self.maxlen} != 0\"\n else:\n required_len = Q_t + self.maxlen\n if x.shape[2] < required_len:\n x = F.pad(x, (0, 0, required_len - x.shape[2], 0), value=SENTINEL)\n assert x.shape[2] >= required_len\n back = x[:, :, -Q_t - self.maxlen : -self.maxlen]\n front = x[:, :, -Q_t:]",
+ "type": "code",
+ "location": "/lib/xf.py:141-163"
+ },
+ "355": {
+ "file_id": 19,
+ "content": "This code defines a StridedAttn class which is a subclass of Attn. The __init__ method initializes the number of heads, stride, maximum length, and whether or not to use a mask. The _preproc method preprocesses input data by reshaping, padding if necessary, and defining undo operations for later use. It also checks that the query tensor length is divisible by the maximum length.",
+ "type": "comment"
+ },
+ "356": {
+ "file_id": 19,
+ "content": " x = th.cat([back, front], dim=1)\n _, _, t, _ = x.shape\n assert t == Q_t, f\"{t} != {Q_t}\"\n x, undo = misc.reshape_undo(\n x,\n \"b, pad_shift, t*maxlen, stride*h*q\",\n \"b, pad_shift, t, maxlen, stride, h, q\",\n maxlen=self.maxlen,\n h=self.h,\n stride=self.stride,\n undo=undo,\n )\n x, undo = misc.transpose_undo(x, \"bptmshq\", \"bthspmq\", undo=undo)\n x, undo = misc.reshape_undo(\n x,\n \"b, t, h, stride, pad_shift, maxlen, q\",\n \"b*t*h*stride, pad_shift*maxlen, q\",\n undo=undo,\n )\n if name == \"Q\":\n return x, undo, Q_t, Q_pad\n else:\n return x\n def preproc_qkv(self, Q_bte, K_bte, V_bte):\n pad = _required_padding(Q_bte.shape[1], self.stride)\n if pad:\n Q_bte = F.pad(Q_bte, (0, 0, 0, pad), value=SENTINEL)\n K_bte = F.pad(K_bte, (0, 0, 0, pad), value=SENTINEL) if K_bte is not None else None",
+ "type": "code",
+ "location": "/lib/xf.py:164-192"
+ },
+ "357": {
+ "file_id": 19,
+ "content": "This code block preprocesses input data for a deep learning model. It performs operations like concatenation, reshaping, and transposition to prepare the data in a suitable format for further processing. The code also includes padding operations to handle data with different dimensions.",
+ "type": "comment"
+ },
+ "358": {
+ "file_id": 19,
+ "content": " V_bte = F.pad(V_bte, (0, 0, 0, pad), value=SENTINEL) if V_bte is not None else None\n undo = lambda x, pad=pad: x[:, :-pad]\n else:\n undo = None\n if K_bte is not None:\n pad = _required_padding(K_bte.shape[1], self.stride)\n if pad:\n K_bte = F.pad(K_bte, (0, 0, pad, 0), value=SENTINEL)\n V_bte = F.pad(V_bte, (0, 0, pad, 0), value=SENTINEL)\n assert Q_bte.shape[1] % self.stride == 0\n assert K_bte is None or K_bte.shape[1] % self.stride == 0\n assert V_bte is None or V_bte.shape[1] % self.stride == 0\n Q, postproc, Q_t, Q_pad = self._preproc(Q_bte, \"Q\")\n postproc = misc.compose_undo(undo, postproc)\n return (\n postproc,\n Q,\n self._preproc(K_bte, \"K\", Q_t=Q_t, Q_pad=Q_pad) if K_bte is not None else None,\n self._preproc(V_bte, \"V\", Q_t=Q_t, Q_pad=Q_pad) if V_bte is not None else None,\n )\n def preproc_r(self, R_bte):\n _, R, _, _ = self.preproc_qkv(R_bte, None, None)",
+ "type": "code",
+ "location": "/lib/xf.py:193-215"
+ },
+ "359": {
+ "file_id": 19,
+ "content": "This code performs preprocessing for query (Q), key (K), and value (V) tensors in a transformer model. If any of the tensors are None, they are padded with a sentinel value. The function returns preprocessing results including postprocessing operations (postproc) and prepared Q, K, and V tensors for training or inference.",
+ "type": "comment"
+ },
+ "360": {
+ "file_id": 19,
+ "content": " return R\nQ_SCALE = 0.1\nK_SCALE = 0.2\nV_SCALE = 1.0\nPROJ_SCALE = 1.0\nMLP0_SCALE = 1.0\nMLP1_SCALE = 1.0\nR_SCALE = 0.1\nB_SCALE = 0.2\nclass AttentionLayerBase(nn.Module):\n def __init__(\n self,\n *,\n attn,\n scale,\n x_size,\n c_size,\n qk_size,\n v_size,\n dtype,\n relattn=False,\n seqlens=None,\n separate=False,\n ):\n super().__init__()\n dtype = tu.parse_dtype(dtype)\n self.attn = attn\n self.x_size = x_size\n self.c_size = c_size\n s = math.sqrt(scale)\n separgs = dict(seqlens=seqlens, separate=separate)\n self.q_layer = MultiscaleLinear(x_size, qk_size, name=\"q\", scale=Q_SCALE, dtype=dtype, **separgs)\n self.k_layer = MultiscaleLinear(c_size, qk_size, name=\"k\", scale=K_SCALE, bias=False, dtype=dtype, **separgs)\n self.v_layer = MultiscaleLinear(c_size, v_size, name=\"v\", scale=V_SCALE * s, bias=False, dtype=dtype, **separgs)\n self.proj_layer = MultiscaleLinear(v_size, x_size, name=\"proj\", scale=PROJ_SCALE * s, dtype=dtype, **separgs)",
+ "type": "code",
+ "location": "/lib/xf.py:216-254"
+ },
+ "361": {
+ "file_id": 19,
+ "content": "This code defines an `AttentionLayerBase` class that inherits from `nn.Module`. It takes in several parameters such as `attn`, `scale`, `x_size`, `c_size`, `qk_size`, `v_size`, `dtype`, `relattn`, and `seqlens`. Inside the class, it initializes multiple layers using `MultiscaleLinear` with different scales and sizes based on the input parameters. These layers are used for query (Q), key (K), value (V) computations, and projection.",
+ "type": "comment"
+ },
+ "362": {
+ "file_id": 19,
+ "content": " self.relattn = relattn\n maxlen = attn.maxlen\n assert maxlen > 0 or not attn.mask\n if self.relattn:\n nbasis = 10\n self.r_layer = tu.NormedLinear(x_size, nbasis * attn.h, scale=R_SCALE, dtype=dtype)\n self.b_nd = nn.Parameter(th.randn(nbasis, maxlen) * B_SCALE)\n self.maxlen = maxlen\n self.dtype = dtype\n def relattn_logits(self, X_bte, T):\n R_btn = self.r_layer(X_bte).float()\n R_btn = self.attn.preproc_r(R_btn)\n t = R_btn.shape[1]\n D_ntT = util.bandify(self.b_nd, t, T)\n extra_btT = th.einsum(\"btn,ntp->btp\", R_btn, D_ntT)\n return extra_btT\ndef quick_gelu(x):\n return x * th.sigmoid(1.702 * x)\ndef act(actname, x):\n if actname == \"relu\":\n return F.relu(x)\n elif actname == \"gelu\":\n return quick_gelu(x)\n elif actname == \"none\":\n return x\n else:\n raise NotImplementedError(actname)\nclass SelfAttentionLayer(AttentionLayerBase):\n \"\"\"\n Residual attention layer that takes a single tensor x and has it attend to itself",
+ "type": "code",
+ "location": "/lib/xf.py:255-291"
+ },
+ "363": {
+ "file_id": 19,
+ "content": "This code defines a class called SelfAttentionLayer which inherits from AttentionLayerBase. It initializes the relattn attribute, checks if relattn is set, and then initializes r_layer and b_nd if relattn is true. The maxlen, dtype attributes are also initialized based on the input attn. Finally, a relattn_logits method is defined to compute the relative attention logits for the input X_bte and T. Additionally, there are two helper functions: relu, gelu, and none act as activation functions which can be applied to the output of the layer.",
+ "type": "comment"
+ },
+ "364": {
+ "file_id": 19,
+ "content": " Has the form\n output = x + f(x)\n \"\"\"\n def __init__(\n self,\n x_size,\n attn,\n scale,\n dtype=\"float32\",\n norm=\"layer\",\n cache_keep_len=None,\n relattn=False,\n log_scope=\"sa\",\n use_muP_factor=False,\n **kwargs,\n ):\n super().__init__(\n x_size=x_size,\n c_size=x_size,\n qk_size=x_size,\n v_size=x_size,\n attn=attn,\n scale=scale,\n relattn=relattn,\n dtype=dtype,\n **kwargs,\n )\n self.ln_x = util.get_norm(norm, x_size, dtype=dtype)\n if cache_keep_len is None:\n if hasattr(attn, \"cache_keep_len\"):\n cache_keep_len = attn.cache_keep_len\n else:\n if isinstance(attn, StridedAttn):\n stride = attn.stride\n else:\n stride = 1\n cache_keep_len = stride * attn.maxlen\n self.cache_keep_len = cache_keep_len\n self.log_scope = log_scope",
+ "type": "code",
+ "location": "/lib/xf.py:292-331"
+ },
+ "365": {
+ "file_id": 19,
+ "content": "This code defines a class constructor for an Attention module. It initializes the object with various parameters and sets up some attributes like normalization layers and cache lengths.",
+ "type": "comment"
+ },
+ "366": {
+ "file_id": 19,
+ "content": " self.use_muP_factor = use_muP_factor\n def residual(self, X_bte, state):\n X_bte = self.ln_x(X_bte)\n Q_bte = self.q_layer(X_bte)\n K_bte = self.k_layer(X_bte)\n V_bte = self.v_layer(X_bte)\n if state:\n state, K_bte, V_bte = self.update_state(state, K_bte, V_bte)\n postproc_closure, Q_bte, K_bte, V_bte = self.attn.preproc_qkv(Q_bte, K_bte, V_bte)\n extra_btT = self.relattn_logits(X_bte, K_bte.shape[1]) if self.relattn else None\n A_bte = attention(\n Q_bte,\n K_bte,\n V_bte,\n mask=self.attn.mask,\n extra_btT=extra_btT,\n maxlen=self.maxlen,\n dtype=self.dtype,\n check_sentinel=isinstance(self.attn, StridedAttn),\n use_muP_factor=self.use_muP_factor,\n )\n A_bte = postproc_closure(A_bte)\n Aproj_bte = self.proj_layer(A_bte)\n return Aproj_bte, state\n def forward(self, X_bte, state):\n R_bte, state = self.residual(X_bte, state)",
+ "type": "code",
+ "location": "/lib/xf.py:332-359"
+ },
+ "367": {
+ "file_id": 19,
+ "content": "This code defines a class with two methods: \"residual\" and \"forward\". The \"residual\" method applies attention to input data, using a self-attention mechanism. It also allows for updating the state based on an argument passed in. The \"forward\" method is a wrapper around the \"residual\" method which also returns the updated state.",
+ "type": "comment"
+ },
+ "368": {
+ "file_id": 19,
+ "content": " return X_bte + R_bte, state\n def stateless_forward(self, X_bte):\n out_bte, _state = self.forward(X_bte, None)\n return out_bte\n def update_state(self, state, K_bte, V_bte):\n def append(prev, new):\n \"\"\"\n Given `prev` keys from cache, and `new` keys,\n returns (cache, full), where\n - cache goes into the output state, length chosen so that on the\n next timestep, there are enough cached timesteps to get the full\n context of lenth self.maxlen.\n - full is used for the current forward pass, with length chosen so\n that the first timestep new[:, 0] gets to see a context of\n self.maxlen.\n \"\"\"\n tprev = prev.shape[1]\n startfull = max(tprev - self.cache_keep_len, 0)\n full = th.cat([prev[:, startfull:], new], dim=1)\n outstate = full[:, max(full.shape[1] - (self.cache_keep_len), 0) :]\n # To see that the preceding slicing is correct, consider the case",
+ "type": "code",
+ "location": "/lib/xf.py:360-382"
+ },
+ "369": {
+ "file_id": 19,
+ "content": "The code defines three functions for a neural network:\n1. `forward` performs the forward pass of the network, taking input X_bte and state as arguments, and returns output and updated state.\n2. `stateless_forward` performs a forward pass without considering the state from the previous timestep, only taking input X_bte as an argument.\n3. `update_state` updates the network's internal state based on current and cached keys (K_bte and V_bte), returning the updated cache and full key matrix for the next timestep.",
+ "type": "comment"
+ },
+ "370": {
+ "file_id": 19,
+ "content": " # that maxlen==1. Then `full` only consists of `new`, and\n # `outstate` is empty\n return outstate, full\n instate_K, instate_V = state\n outstate_K, K_bte = append(instate_K, K_bte)\n outstate_V, V_bte = append(instate_V, V_bte)\n assert outstate_K.shape[-2] <= self.cache_keep_len\n return (outstate_K, outstate_V), K_bte, V_bte\n def initial_state(self, batchsize, initial_T=0):\n return (\n tu.zeros((batchsize, initial_T, self.x_size), dtype=self.dtype),\n tu.zeros((batchsize, initial_T, self.x_size), dtype=self.dtype),\n )\n def empty_state(self):\n return None\nclass PointwiseLayer(nn.Module):\n \"\"\"\n Residual MLP applied at each timestep\n \"\"\"\n def __init__(self, x_size, scale, dtype, norm, actname=\"relu\", mlp_ratio=2):\n super().__init__()\n s = math.sqrt(scale)\n self.ln = util.get_norm(norm, x_size, dtype=dtype)\n self.mlp = mlp.MLP(\n insize=x_size,\n nhidlayer=1,",
+ "type": "code",
+ "location": "/lib/xf.py:383-414"
+ },
+ "371": {
+ "file_id": 19,
+ "content": "The code initializes a residual MLP layer with a specified size, scale, data type, normalization method, activation function, and a ratio for the multi-layer perceptron (MLP). The class PointwiseLayer inherits from nn.Module and contains an instance of the Linear layer and a normalization layer, as well as a method to apply the MLP at each timestep in the input sequence.",
+ "type": "comment"
+ },
+ "372": {
+ "file_id": 19,
+ "content": " outsize=x_size,\n hidsize=int(x_size * mlp_ratio),\n hidactiv=functools.partial(act, actname),\n dtype=dtype,\n )\n self.mlp.layers[0].weight.data *= MLP0_SCALE * s\n self.mlp.layers[1].weight.data *= MLP1_SCALE * s\n def residual(self, x):\n x = self.ln(x)\n x = self.mlp(x)\n return x\n def forward(self, x):\n return x + self.residual(x)\ndef _is_separate(sep, name):\n if isinstance(sep, bool):\n return sep\n assert isinstance(sep, set)\n if name in sep:\n sep.remove(name)\n return True\n else:\n return False\ndef make_maybe_multiscale(make_fn, *args, seqlens, separate, name, **kwargs):\n \"\"\"\n This function either creates one instance of a module or creates\n a separate instance of the module for each resolution of the image,\n determined by the `separate` parameter. We create separate modules\n if `separate` is True or if `separate` is a set containing `name`.\n \"\"\"\n if _is_separate(separate, name):",
+ "type": "code",
+ "location": "/lib/xf.py:415-450"
+ },
+ "373": {
+ "file_id": 19,
+ "content": "This code defines a class and a function for creating instances of a module, either for all resolutions or separate instances for each resolution based on the \"separate\" parameter. The class has an initializer that sets up the module's layers and applies scaling to their weights. The forward function performs a residual connection with the module. The _is_separate function checks if a separate instance should be created for the given name, removing it from the set if it should be created separately. The make_maybe_multiscale function creates either one instance or multiple instances of the module based on the separate parameter.",
+ "type": "comment"
+ },
+ "374": {
+ "file_id": 19,
+ "content": " modules = [make_fn(*args, **kwargs) for _ in seqlens]\n return SplitCallJoin(modules, seqlens)\n else:\n return make_fn(*args, **kwargs)\nclass SplitCallJoin(nn.Module):\n def __init__(self, mods, seqlens):\n super().__init__()\n self.mods = nn.ModuleList(mods)\n self.seqlens = seqlens\n def forward(self, x):\n tl = sum(self.seqlens)\n x, undo = misc.reshape_undo(x, \"..., z*tl, e\", \"..., z, tl, e\", tl=tl)\n x = list(th.split(x, self.seqlens, dim=-2))\n new_x = []\n for x, mod in misc.safezip(x, self.mods):\n x, this_undo = misc.reshape_undo(x, \"..., z, l, e\", \"..., z*l, e\")\n x = mod(x)\n x = this_undo(x)\n new_x.append(x)\n x = th.cat(new_x, dim=-2)\n x = undo(x)\n return x\nMultiscaleLinear = functools.partial(make_maybe_multiscale, tu.NormedLinear)\nMultiscalePointwise = functools.partial(make_maybe_multiscale, PointwiseLayer)",
+ "type": "code",
+ "location": "/lib/xf.py:451-479"
+ },
+ "375": {
+ "file_id": 19,
+ "content": "This code defines a function `SplitCallJoin` that takes a list of modules and sequence lengths as inputs. It initializes the `SplitCallJoin` class, which splits the input tensor into multiple smaller tensors based on the sequence lengths, applies each module in parallel, and then concatenates the results back together. The function also defines two partial functions `MultiscaleLinear` and `MultiscalePointwise` using `functools.partial` to create variants of `make_maybe_multiscale` for `tu.NormedLinear` and `PointwiseLayer`, respectively.",
+ "type": "comment"
+ },
+ "376": {
+ "file_id": 20,
+ "content": "/requirements.txt",
+ "type": "filepath"
+ },
+ "377": {
+ "file_id": 20,
+ "content": "Installs necessary libraries: PyTorch, Gym, attrs, and OpenCV Python.",
+ "type": "summary"
+ },
+ "378": {
+ "file_id": 20,
+ "content": "torch==1.9.0\ngym3\nattrs\nopencv-python",
+ "type": "code",
+ "location": "/requirements.txt:1-4"
+ },
+ "379": {
+ "file_id": 20,
+ "content": "Installs necessary libraries: PyTorch, Gym, attrs, and OpenCV Python.",
+ "type": "comment"
+ },
+ "380": {
+ "file_id": 21,
+ "content": "/run_agent.py",
+ "type": "filepath"
+ },
+ "381": {
+ "file_id": 21,
+ "content": "The code imports libraries, defines a function 'main' that loads and uses a pre-trained model in the MineRL environment, taking two arguments: the path to the model file and weights file. It also adds an optional argument \"--model\" of type string for the file path loading.",
+ "type": "summary"
+ },
+ "382": {
+ "file_id": 21,
+ "content": "from argparse import ArgumentParser\nimport pickle\nfrom minerl.herobraine.env_specs.human_survival_specs import HumanSurvival\nfrom agent import MineRLAgent, ENV_KWARGS\ndef main(model, weights):\n env = HumanSurvival(**ENV_KWARGS).make()\n print(\"---Loading model---\")\n agent_parameters = pickle.load(open(model, \"rb\"))\n policy_kwargs = agent_parameters[\"model\"][\"args\"][\"net\"][\"args\"]\n pi_head_kwargs = agent_parameters[\"model\"][\"args\"][\"pi_head_opts\"]\n pi_head_kwargs[\"temperature\"] = float(pi_head_kwargs[\"temperature\"])\n agent = MineRLAgent(env, policy_kwargs=policy_kwargs, pi_head_kwargs=pi_head_kwargs)\n agent.load_weights(weights)\n print(\"---Launching MineRL enviroment (be patient)---\")\n obs = env.reset()\n while True:\n minerl_action = agent.get_action(obs)\n obs, reward, done, info = env.step(minerl_action)\n env.render()\nif __name__ == \"__main__\":\n parser = ArgumentParser(\"Run pretrained models on MineRL environment\")\n parser.add_argument(\"--weights\", type=str, required=True, help=\"Path to the '.weights' file to be loaded.\")",
+ "type": "code",
+ "location": "/run_agent.py:1-30"
+ },
+ "383": {
+ "file_id": 21,
+ "content": "The code imports necessary libraries and defines a function named \"main\" which loads a pre-trained model, creates an agent, and then launches the MineRL environment. The main function takes two arguments: 'model', the path to the pickle file containing the loaded model's parameters; and 'weights', the path to the '.weights' file to be loaded. It then continuously takes actions in the environment based on the pre-trained agent's recommendations until the MineRL environment is completed or terminated.",
+ "type": "comment"
+ },
+ "384": {
+ "file_id": 21,
+ "content": " parser.add_argument(\"--model\", type=str, required=True, help=\"Path to the '.model' file to be loaded.\")\n args = parser.parse_args()\n main(args.model, args.weights)",
+ "type": "code",
+ "location": "/run_agent.py:31-35"
+ },
+ "385": {
+ "file_id": 21,
+ "content": "This code is adding a required argument \"--model\" to the parser, specifying its type as string and loading the file path from this argument.",
+ "type": "comment"
+ },
+ "386": {
+ "file_id": 22,
+ "content": "/run_inverse_dynamics_model.py",
+ "type": "filepath"
+ },
+ "387": {
+ "file_id": 22,
+ "content": "The code initializes a game dictionary, defines model actions, manages camera resets, handles inputs, loads weights, captures video input, reads JSON data, and displays IDM predictions on a video stream with OpenCV functions.",
+ "type": "summary"
+ },
+ "388": {
+ "file_id": 22,
+ "content": "# NOTE: this is _not_ the original code of IDM!\n# As such, while it is close and seems to function well,\n# its performance might be bit off from what is reported\n# in the paper.\nfrom argparse import ArgumentParser\nimport pickle\nimport cv2\nimport numpy as np\nimport json\nimport torch as th\nfrom agent import ENV_KWARGS\nfrom inverse_dynamics_model import IDMAgent\nKEYBOARD_BUTTON_MAPPING = {\n \"key.keyboard.escape\" :\"ESC\",\n \"key.keyboard.s\" :\"back\",\n \"key.keyboard.q\" :\"drop\",\n \"key.keyboard.w\" :\"forward\",\n \"key.keyboard.1\" :\"hotbar.1\",\n \"key.keyboard.2\" :\"hotbar.2\",\n \"key.keyboard.3\" :\"hotbar.3\",\n \"key.keyboard.4\" :\"hotbar.4\",\n \"key.keyboard.5\" :\"hotbar.5\",\n \"key.keyboard.6\" :\"hotbar.6\",\n \"key.keyboard.7\" :\"hotbar.7\",\n \"key.keyboard.8\" :\"hotbar.8\",\n \"key.keyboard.9\" :\"hotbar.9\",\n \"key.keyboard.e\" :\"inventory\",\n \"key.keyboard.space\" :\"jump\",\n \"key.keyboard.a\" :\"left\",\n \"key.keyboard.d\" :\"right\",\n \"key.keyboard.left.shift\" :\"sneak\",\n \"key.keyboard.left.control\" :\"sprint\",",
+ "type": "code",
+ "location": "/run_inverse_dynamics_model.py:1-36"
+ },
+ "389": {
+ "file_id": 22,
+ "content": "This code is initializing a dictionary mapping keyboard button names to their respective actions in the game. The code is used for controlling the character's movements and actions in the game environment using keyboard inputs.",
+ "type": "comment"
+ },
+ "390": {
+ "file_id": 22,
+ "content": " \"key.keyboard.f\" :\"swapHands\",\n}\n# Template action\nNOOP_ACTION = {\n \"ESC\": 0,\n \"back\": 0,\n \"drop\": 0,\n \"forward\": 0,\n \"hotbar.1\": 0,\n \"hotbar.2\": 0,\n \"hotbar.3\": 0,\n \"hotbar.4\": 0,\n \"hotbar.5\": 0,\n \"hotbar.6\": 0,\n \"hotbar.7\": 0,\n \"hotbar.8\": 0,\n \"hotbar.9\": 0,\n \"inventory\": 0,\n \"jump\": 0,\n \"left\": 0,\n \"right\": 0,\n \"sneak\": 0,\n \"sprint\": 0,\n \"swapHands\": 0,\n \"camera\": np.array([0, 0]),\n \"attack\": 0,\n \"use\": 0,\n \"pickItem\": 0,\n}\nMESSAGE = \"\"\"\nThis script will take a video, predict actions for its frames and\nand show them with a cv2 window.\nPress any button the window to proceed to the next frame.\n\"\"\"\n# Matches a number in the MineRL Java code regarding sensitivity\n# This is for mapping from recorded sensitivity to the one used in the model\nCAMERA_SCALER = 360.0 / 2400.0\ndef json_action_to_env_action(json_action):\n \"\"\"\n Converts a json action into a MineRL action.\n Returns (minerl_action, is_null_action)\n \"\"\"\n # This might be slow...\n env_action = NOOP_ACTION.copy()",
+ "type": "code",
+ "location": "/run_inverse_dynamics_model.py:37-86"
+ },
+ "391": {
+ "file_id": 22,
+ "content": "This code defines a set of actions that the model should predict for a given video. It also includes a template action and a message to be displayed with a cv2 window. The CAMERA_SCALER is used for mapping sensitivity from recorded Java code to the one used in the model. The json_action_to_env_action function converts a JSON action into a MineRL action.",
+ "type": "comment"
+ },
+ "392": {
+ "file_id": 22,
+ "content": " # As a safeguard, make camera action again so we do not override anything\n env_action[\"camera\"] = np.array([0, 0])\n is_null_action = True\n keyboard_keys = json_action[\"keyboard\"][\"keys\"]\n for key in keyboard_keys:\n # You can have keys that we do not use, so just skip them\n # NOTE in original training code, ESC was removed and replaced with\n # \"inventory\" action if GUI was open.\n # Not doing it here, as BASALT uses ESC to quit the game.\n if key in KEYBOARD_BUTTON_MAPPING:\n env_action[KEYBOARD_BUTTON_MAPPING[key]] = 1\n is_null_action = False\n mouse = json_action[\"mouse\"]\n camera_action = env_action[\"camera\"]\n camera_action[0] = mouse[\"dy\"] * CAMERA_SCALER\n camera_action[1] = mouse[\"dx\"] * CAMERA_SCALER\n if mouse[\"dx\"] != 0 or mouse[\"dy\"] != 0:\n is_null_action = False\n else:\n if abs(camera_action[0]) > 180:\n camera_action[0] = 0\n if abs(camera_action[1]) > 180:\n camera_action[1] = 0",
+ "type": "code",
+ "location": "/run_inverse_dynamics_model.py:87-112"
+ },
+ "393": {
+ "file_id": 22,
+ "content": "This code resets the camera action to avoid overriding other actions and handles keyboard and mouse inputs for the environment.",
+ "type": "comment"
+ },
+ "394": {
+ "file_id": 22,
+ "content": " mouse_buttons = mouse[\"buttons\"]\n if 0 in mouse_buttons:\n env_action[\"attack\"] = 1\n is_null_action = False\n if 1 in mouse_buttons:\n env_action[\"use\"] = 1\n is_null_action = False\n if 2 in mouse_buttons:\n env_action[\"pickItem\"] = 1\n is_null_action = False\n return env_action, is_null_action\ndef main(model, weights, video_path, json_path, n_batches, n_frames):\n print(MESSAGE)\n agent_parameters = pickle.load(open(model, \"rb\"))\n net_kwargs = agent_parameters[\"model\"][\"args\"][\"net\"][\"args\"]\n pi_head_kwargs = agent_parameters[\"model\"][\"args\"][\"pi_head_opts\"]\n pi_head_kwargs[\"temperature\"] = float(pi_head_kwargs[\"temperature\"])\n agent = IDMAgent(idm_net_kwargs=net_kwargs, pi_head_kwargs=pi_head_kwargs)\n agent.load_weights(weights)\n required_resolution = ENV_KWARGS[\"resolution\"]\n cap = cv2.VideoCapture(video_path)\n json_index = 0\n with open(json_path) as json_file:\n json_lines = json_file.readlines()\n json_data = \"[\" + \",\".join(json_lines) + \"]\"",
+ "type": "code",
+ "location": "/run_inverse_dynamics_model.py:114-143"
+ },
+ "395": {
+ "file_id": 22,
+ "content": "This code handles mouse button events and initializes an inverse dynamics model agent for a game. It loads the agent's weights from a file, captures video input, and reads a JSON file containing game data.",
+ "type": "comment"
+ },
+ "396": {
+ "file_id": 22,
+ "content": " json_data = json.loads(json_data)\n for _ in range(n_batches):\n th.cuda.empty_cache()\n print(\"=== Loading up frames ===\")\n frames = []\n recorded_actions = []\n for _ in range(n_frames):\n ret, frame = cap.read()\n if not ret:\n break\n assert frame.shape[0] == required_resolution[1] and frame.shape[1] == required_resolution[0], \"Video must be of resolution {}\".format(required_resolution)\n # BGR -> RGB\n frames.append(frame[..., ::-1])\n env_action, _ = json_action_to_env_action(json_data[json_index])\n recorded_actions.append(env_action)\n json_index += 1\n frames = np.stack(frames)\n print(\"=== Predicting actions ===\")\n predicted_actions = agent.predict_actions(frames)\n for i in range(n_frames):\n frame = frames[i]\n recorded_action = recorded_actions[i]\n cv2.putText(\n frame,\n f\"name: prediction (true)\",",
+ "type": "code",
+ "location": "/run_inverse_dynamics_model.py:144-170"
+ },
+ "397": {
+ "file_id": 22,
+ "content": "Loading and preprocessing video frames, converting actions from JSON to environment actions, predicting actions using the agent model, and displaying predictions on video frames.",
+ "type": "comment"
+ },
+ "398": {
+ "file_id": 22,
+ "content": " (10, 10),\n cv2.FONT_HERSHEY_SIMPLEX,\n 0.4,\n (255, 255, 255),\n 1\n )\n for y, (action_name, action_array) in enumerate(predicted_actions.items()):\n current_prediction = action_array[0, i]\n cv2.putText(\n frame,\n f\"{action_name}: {current_prediction} ({recorded_action[action_name]})\",\n (10, 25 + y * 12),\n cv2.FONT_HERSHEY_SIMPLEX,\n 0.35,\n (255, 255, 255),\n 1\n )\n # RGB -> BGR again...\n cv2.imshow(\"MineRL IDM model predictions\", frame[..., ::-1])\n cv2.waitKey(0)\n cv2.destroyAllWindows()\nif __name__ == \"__main__\":\n parser = ArgumentParser(\"Run IDM on MineRL recordings.\")\n parser.add_argument(\"--weights\", type=str, required=True, help=\"Path to the '.weights' file to be loaded.\")\n parser.add_argument(\"--model\", type=str, required=True, help=\"Path to the '.model' file to be loaded.\")",
+ "type": "code",
+ "location": "/run_inverse_dynamics_model.py:171-197"
+ },
+ "399": {
+ "file_id": 22,
+ "content": "The code is displaying IDM model predictions on a video stream, with text labels for each action. It uses OpenCV's putText function to draw the labels on the frame and then displays the resulting image using cv2.imshow and waitKey functions. The code also takes arguments for weights and model files required to load the model.",
+ "type": "comment"
+ }
+}
\ No newline at end of file
diff --git a/docs/data/4.json b/docs/data/4.json
new file mode 100644
index 0000000..9700acf
--- /dev/null
+++ b/docs/data/4.json
@@ -0,0 +1,13 @@
+{
+ "400": {
+ "file_id": 22,
+ "content": " parser.add_argument(\"--video-path\", type=str, required=True, help=\"Path to a .mp4 file (Minecraft recording).\")\n parser.add_argument(\"--jsonl-path\", type=str, required=True, help=\"Path to a .jsonl file (Minecraft recording).\")\n parser.add_argument(\"--n-frames\", type=int, default=128, help=\"Number of frames to process at a time.\")\n parser.add_argument(\"--n-batches\", type=int, default=10, help=\"Number of batches (n-frames) to process for visualization.\")\n args = parser.parse_args()\n main(args.model, args.weights, args.video_path, args.jsonl_path, args.n_batches, args.n_frames)",
+ "type": "code",
+ "location": "/run_inverse_dynamics_model.py:198-205"
+ },
+ "401": {
+ "file_id": 22,
+ "content": "This code sets up command line arguments for video path, JSONL file path, number of frames to process at a time, and the number of batches to process for visualization. It then parses these arguments into \"args\" and calls the main function with these arguments.",
+ "type": "comment"
+ }
+}
\ No newline at end of file
diff --git a/docs/data/titles/0.json b/docs/data/titles/0.json
new file mode 100644
index 0000000..ac19532
--- /dev/null
+++ b/docs/data/titles/0.json
@@ -0,0 +1,201 @@
+{
+ "/README.md": "Minecraft AI Competition and House Building",
+ "/README.md:101-114": "Video Pre-Training Demonstration",
+ "/README.md:116-136": "Major Limitations and Versions Used",
+ "/README.md:136-152": "Minecraft AI Model Training Versions",
+ "/README.md:153-168": "Minecraft AI: Enhancing Performance through Early Data",
+ "/README.md:168-182": "Simple Wooden House Building Guide",
+ "/README.md:182-194": "Timed Minecraft Demo: House Building Challenge",
+ "/README.md:196-215": "Time-Based Task Completion",
+ "/README.md:216-237": "Random Starting Inventory Generator",
+ "/README.md:238-250": "Random Biome-based Hotbar Items",
+ "/README.md:251-263": "Randomized Inventory Population",
+ "/README.md:264-288": "Sunflower Inventory Boost",
+ "/README.md:288-304": "Minecraft Diamonds and Version Updates",
+ "/README.md:3-35": "Video PreTraining Model Setup and Usage",
+ "/README.md:304-311": "Minecraft Testing Guidelines",
+ "/README.md:312-338": "Demonstration Format and Index",
+ "/README.md:340-388": "Action and File Structure for Data Loader",
+ "/README.md:37-51": "Pre-Trained Minecraft RL Models",
+ "/README.md:389-418": "JSON Game State Data for Minecraft",
+ "/README.md:420-439": "Cave Hunt and Waterfall Snapshot",
+ "/README.md:440-451": "Village-Friendly Animal Pen Building Prompt",
+ "/README.md:451-467": "AI Minecraft Tasks: Construct, Protect, and Tour in 5 Minutes",
+ "/README.md:467-487": "Eco-Friendly Minecraft Village Building",
+ "/README.md:488-497": "Acknowledging Authors & Crediting Anssi Kanervisto",
+ "/README.md:52-65": "Pre-Trained Minecraft Models and Fine-Tuning",
+ "/README.md:66-85": "Pre-Trained Minecraft Models for Faster Tech Tree Progress",
+ "/README.md:86-99": "IDM Demonstration with Contractor Recordings",
+ "/agent.py": "Reinforcement Learning Minecraft Agent Setup",
+ "/agent.py:1-40": "Minecraft Agent Configuration",
+ "/agent.py:102-128": "MineRL Agent Image Resizing",
+ "/agent.py:129-154": "Agent Class and Methods",
+ "/agent.py:155-175": "Action Transformation and Conversion in MineRL",
+ "/agent.py:176-199": "Observation-Action Class",
+ "/agent.py:200-206": "Agent's Policy Action Selection",
+ "/agent.py:41-77": "Game Agent Parameter Settings",
+ "/agent.py:78-101": "MineRL Environment Validation and Setup",
+ "/behavioural_cloning.py": "Agent-Based Actor-Critic Training in Behavioral Cloning",
+ "/behavioural_cloning.py:1-34": "Behavioral Cloning: Fine-Tuning with Gradient Accumulation",
+ "/behavioural_cloning.py:115-139": "Training Policy Model with Behavioral Cloning",
+ "/behavioural_cloning.py:140-143": "Command Line Parsing for Behavioral Cloning Training",
+ "/behavioural_cloning.py:36-60": "Load and Configure Agent Object",
+ "/behavioural_cloning.py:62-91": "Training Environment Setup",
+ "/behavioural_cloning.py:92-114": "Policy-Based Actor-Critic Environment Setup",
+ "/data_loader.py": "Batch Data Loader with Processes",
+ "/data_loader.py:1-40": "MineRL VPT Data Loader",
+ "/data_loader.py:114-134": "Compositing and Resizing Cursors",
+ "/data_loader.py:135-155": "Sub-Inefficient Sequence Loader",
+ "/data_loader.py:156-178": "Video Data Loader for Shuffled Demonstrations",
+ "/data_loader.py:179-209": "Batch Data Loading Workers",
+ "/data_loader.py:210-222": "Batch Data Processor",
+ "/data_loader.py:41-67": "Video Game Annotation Extractor",
+ "/data_loader.py:68-91": "Scrollwheel-based Attack Unsticking",
+ "/data_loader.py:92-113": "Stuck State Detection and Action Removal",
+ "/inverse_dynamics_model.py": "IDMAgent: Minecraft Predictor",
+ "/inverse_dynamics_model.py:1-33": "IDMAgent: Minecraft Action Predictor",
+ "/inverse_dynamics_model.py:34-56": "Agent Initialization and Weight Loading",
+ "/inverse_dynamics_model.py:57-79": "Policy to MineRL Action Conversion",
+ "/inverse_dynamics_model.py:80-95": "Agent State Tracking and Action Prediction",
+ "/lib/action_head.py": "Abstract Action Head Class for RL Action Spaces",
+ "/lib/action_head.py:1-36": "Fan-In Initialized Action Heads",
+ "/lib/action_head.py:120-151": "Categorical Action Head Initialization",
+ "/lib/action_head.py:152-174": "Action Head Class Definition and Forward Function",
+ "/lib/action_head.py:176-196": "Functions for Action Distributions",
+ "/lib/action_head.py:197-217": "Gumbel-Softmax for Categorical Training",
+ "/lib/action_head.py:218-243": "DictActionHead Class: Forward Pass and Parameter Reset",
+ "/lib/action_head.py:244-264": "Action Head Class: Supporting Methods",
+ "/lib/action_head.py:265-275": "ActionHead Selection",
+ "/lib/action_head.py:37-63": "Abstract Base Class for Action Heads",
+ "/lib/action_head.py:64-89": "Orthogonal Initialization for Action Head",
+ "/lib/action_head.py:91-119": "Calculate Entropy, Z-Score, and KL Divergence",
+ "/lib/action_mapping.py": "Action Mapping and Management",
+ "/lib/action_mapping.py:1-32": "Action Mapping Class for Minecraft Customization",
+ "/lib/action_mapping.py:124-142": "Camera Meta Actions Mapping",
+ "/lib/action_mapping.py:143-161": "Button and Camera Array Mapping Setup",
+ "/lib/action_mapping.py:162-180": "Factored Action-Camera Combinations",
+ "/lib/action_mapping.py:181-200": "Assertion Check and Action Mapping",
+ "/lib/action_mapping.py:201-223": "Factoring New Action Space",
+ "/lib/action_mapping.py:225-234": "Class with Three Methods Defined",
+ "/lib/action_mapping.py:33-64": "Abstract Base Class for Action Mapping",
+ "/lib/action_mapping.py:65-82": "Button Selection from Factored Action Space",
+ "/lib/action_mapping.py:83-97": "Priority-based Action Mapping for 'None' Button",
+ "/lib/action_mapping.py:98-122": "Action Mapping Classes for Video Games",
+ "/lib/actions.py": "Action-based Quantization in Minecraft",
+ "/lib/actions.py:1-54": "Minecraft Action and Camera Quantization",
+ "/lib/actions.py:132-160": "Functions: Item to Name Conversion, Dict to Numpy, and Numpy to Dict",
+ "/lib/actions.py:161-178": "Camera Array Transformations",
+ "/lib/actions.py:55-68": "Mu-law and Linear Quantization Schemes",
+ "/lib/actions.py:69-95": "Discretize and Undiscretize Values with Class",
+ "/lib/actions.py:97-130": "Action Transformer Class for MinerL Environment",
+ "/lib/impala_cnn.py": "Customizable ImpalaCNN Architecture",
+ "/lib/impala_cnn.py:1-42": "ImpalaCNN: Residual Basic Blocks",
+ "/lib/impala_cnn.py:108-139": "ImpalaCNN Model Architecture",
+ "/lib/impala_cnn.py:140-171": "ImpalaCNN: Custom Convolutional Class",
+ "/lib/impala_cnn.py:172-195": "Stacked 2D Convolutional CNN Model",
+ "/lib/impala_cnn.py:43-78": "ImpalaCNN: 1D Conv Layers for Image Classification",
+ "/lib/impala_cnn.py:79-107": "ImpalaCNN: Customizable Convolutional Neural Network",
+ "/lib/masked_attention.py": "Masked Attention Mechanism for Time Series",
+ "/lib/masked_attention.py:1-31": "Band Diagonal Time Series Mask",
+ "/lib/masked_attention.py:104-113": "Masked Attention Mechanism Parameters",
+ "/lib/masked_attention.py:114-147": "Masked Attention Initialization",
+ "/lib/masked_attention.py:148-173": "Masked Attention Class and Forward Pass",
+ "/lib/masked_attention.py:174-182": "Masked Attention Method",
+ "/lib/masked_attention.py:32-54": "Generate Boolean Mask for Masked Attention",
+ "/lib/masked_attention.py:55-73": "Update State Mask for Masked Attention",
+ "/lib/masked_attention.py:75-102": "Dynamic Episode Masking in Transformer Layers",
+ "/lib/minecraft_util.py": "Average Entropy Calculator",
+ "/lib/minecraft_util.py:1-32": "Decorator for Storing Method Arguments",
+ "/lib/minecraft_util.py:34-55": "Normalized Entropy Calculation",
+ "/lib/minecraft_util.py:56-79": "Calculate Entropy of Gaussian Action Heads",
+ "/lib/minecraft_util.py:80-88": "Average Entropy from DiagGaussianActionHead/DictActionHead",
+ "/lib/misc.py": "Data Processing Tasks: Calculating Products and Reshaping Input Data",
+ "/lib/misc.py:1-43": "Data Manipulation Functions",
+ "/lib/misc.py:119-157": "Sort Symbols in List",
+ "/lib/misc.py:158-187": "Check and Update Product of List Values",
+ "/lib/misc.py:188-223": "Shape Reshaping and Exception Handling",
+ "/lib/misc.py:224-240": "Input Validation and Shape Inference",
+ "/lib/misc.py:241-263": "Utility Functions in lib/misc.py:241-263",
+ "/lib/misc.py:44-89": "Transpose and Compose Functions",
+ "/lib/misc.py:90-118": "Handling Int and Str Calculations in Function",
+ "/lib/mlp.py": "MLP Class: Neural Network Architecture",
+ "/lib/normalize_ewma.py": "Efficient Normalization with Debiased Means",
+ "/lib/normalize_ewma.py:1-28": "Exponential Weighted Normalization Module",
+ "/lib/normalize_ewma.py:29-51": "Debiased Mean and Variance Calculator",
+ "/lib/normalize_ewma.py:52-60": "Efficient Data Normalization and EWMA Calculation",
+ "/lib/policy.py": "Minecraft Reinforcement Learning with PyTorch",
+ "/lib/policy.py:1-32": "Normalizing Images with ImgPreprocessing Class",
+ "/lib/policy.py:123-150": "Class Initialization and Attributes",
+ "/lib/policy.py:151-178": "Layer Normalization Setup",
+ "/lib/policy.py:179-208": "Initializing Recurrent Module and Forward Pass",
+ "/lib/policy.py:209-238": "MinecraftAgentPolicy Class Definition",
+ "/lib/policy.py:240-269": "Neural Policy Decisions Class",
+ "/lib/policy.py:271-299": "Policy Network Functions",
+ "/lib/policy.py:300-323": "Time Dimension Policy Evaluation",
+ "/lib/policy.py:324-357": "InverseActionNet: MinecraftPolicy Subclass",
+ "/lib/policy.py:33-62": "ImgObsPreprocessing Class",
+ "/lib/policy.py:358-386": "3D Convolution Layer Initialization",
+ "/lib/policy.py:388-424": "Inverse Action Policy Class",
+ "/lib/policy.py:425-457": "Training RL Policy with PyTorch",
+ "/lib/policy.py:458-467": "Log Probability Action Check",
+ "/lib/policy.py:63-91": "Policy Class with CNN and Layer Initialization",
+ "/lib/policy.py:92-122": "Deep Reinforcement Learning Policy Initializer",
+ "/lib/scaled_mse_head.py": "Scaled MSE Loss for Linear Layer",
+ "/lib/scaled_mse_head.py:1-35": "Scaled MSE Head Layer: Normalizing Linear Outputs",
+ "/lib/scaled_mse_head.py:37-50": "Scaled MSE Loss with Normalization/Denormalization",
+ "/lib/torch_util.py": "Efficient PyTorch Library Management",
+ "/lib/torch_util.py:1-57": "Torch CUDA Utilities",
+ "/lib/torch_util.py:132-165": "PyTorch Decorator for Model State Saving",
+ "/lib/torch_util.py:166-199": "PyTorch Tensor Data Type Parser",
+ "/lib/torch_util.py:200-200": "Squeeze Tensor Dimension",
+ "/lib/torch_util.py:60-96": "Linear and LayerNorm Functions in Torch Utils",
+ "/lib/torch_util.py:97-131": "LayerNorm, Flatten, Sequential, Load Avg with Metadata",
+ "/lib/tree_util.py": "Tree Utilities for PyTree API",
+ "/lib/tree_util.py:1-35": "Utility Functions for Dictionary, Tuple, and List Manipulation",
+ "/lib/tree_util.py:120-141": "Validate Tree Structure in Python",
+ "/lib/tree_util.py:142-171": "Tree Utilities: Building and Unflattening Trees",
+ "/lib/tree_util.py:172-201": "PyTreeDef: Tree Nodes Definition and Operations",
+ "/lib/tree_util.py:203-256": "Tree-like Object Serialization Class",
+ "/lib/tree_util.py:259-280": "Serializing Complex Data Types with PyTree API",
+ "/lib/tree_util.py:36-70": "Safe List Operations and Tree Mapping",
+ "/lib/tree_util.py:71-93": "Multi-Input Function Applied to Pytree Leaves",
+ "/lib/tree_util.py:95-119": "Tree Node Type Determination and Iteration",
+ "/lib/util.py": "Neural Network Utilities",
+ "/lib/util.py:1-30": "Recursive Module Key Collection and Fan-in ReLU Layer",
+ "/lib/util.py:127-161": "Residual Recurrent Block Initialization",
+ "/lib/util.py:162-184": "Layer Normalization and MLP Initialization",
+ "/lib/util.py:185-216": "Transformer Block Recurrent Pass",
+ "/lib/util.py:217-253": "Reversing LSTM Input Sequences",
+ "/lib/util.py:254-276": "Band-Repeat Normalization",
+ "/lib/util.py:31-62": "Dynamic Normalization Function Initialization",
+ "/lib/util.py:64-94": "Residual Recurrent Blocks for Neural Network Stability",
+ "/lib/util.py:95-126": "ResidualRecurrentBlock Class",
+ "/lib/xf.py": "Strided Attention Mechanism for Transformers",
+ "/lib/xf.py:1-43": "Softmax Attention Function",
+ "/lib/xf.py:108-138": "XF Class Initialization and Preprocessing",
+ "/lib/xf.py:141-163": "StridedAttn: Preprocessing and Initialization",
+ "/lib/xf.py:164-192": "Data Preprocessing for Deep Learning Model",
+ "/lib/xf.py:193-215": "Transformer Preprocessing Function",
+ "/lib/xf.py:216-254": "Attention Layer Base Class",
+ "/lib/xf.py:255-291": "Self-Attention Layer Implementation",
+ "/lib/xf.py:292-331": "Attention Module Constructor",
+ "/lib/xf.py:332-359": "Attention Residual Class",
+ "/lib/xf.py:360-382": "Neural Network Forward, Stateless Forward & State Update Functions",
+ "/lib/xf.py:383-414": "Residual MLP Layer Initialization",
+ "/lib/xf.py:415-450": "ResNet Multi-Scale Module",
+ "/lib/xf.py:44-68": "Multi-Head Attention Logit Calculation",
+ "/lib/xf.py:451-479": "Split-Apply-Combine with Multiscale Layers",
+ "/lib/xf.py:69-107": "Attention Mechanism Class and Split Heads Function",
+ "/requirements.txt": "PyTorch, Gym, attrs, OpenCV Installation Guide",
+ "/run_agent.py": "Run Agent Model",
+ "/run_agent.py:1-30": "Train MineRL Agent with Pre-loaded Model",
+ "/run_agent.py:31-35": "Argument Parser Model File Loading",
+ "/run_inverse_dynamics_model.py": "Inverse Dynamics Model Game Engine",
+ "/run_inverse_dynamics_model.py:1-36": "Key-Action Mapping Dictionary for Game Controls",
+ "/run_inverse_dynamics_model.py:114-143": "Loading Agent and Capturing Video Input",
+ "/run_inverse_dynamics_model.py:144-170": "Inverse Dynamics Model Execution",
+ "/run_inverse_dynamics_model.py:171-197": "Real-Time IDM Model Predictions on Videos",
+ "/run_inverse_dynamics_model.py:198-205": "Command Line Arguments Parser",
+ "/run_inverse_dynamics_model.py:37-86": "Inverse Dynamics Model Training",
+ "/run_inverse_dynamics_model.py:87-112": "Reset Camera Action & Input Handler"
+}
\ No newline at end of file
diff --git a/docs/doc/070cac4d-21b0-4008-9110-5d80d6a8f90a.json b/docs/doc/070cac4d-21b0-4008-9110-5d80d6a8f90a.json
new file mode 100644
index 0000000..923d984
--- /dev/null
+++ b/docs/doc/070cac4d-21b0-4008-9110-5d80d6a8f90a.json
@@ -0,0 +1,45 @@
+{
+ "summary": "The code establishes Minecraft agent settings for a reinforcement learning project, including environment configuration and an agent class with action mapping, transforming, policy-making capabilities, and device support for actions.",
+ "details": [
+ {
+ "comment": "The code imports necessary libraries and defines various settings for a Minecraft agent. It includes hardcoded resolution, policy arguments, action transformer arguments, and other configuration options. The code seems to be part of a larger project involving reinforcement learning and a specific environment (Minecraft in this case).",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/agent.py\":0-39",
+ "content": "import numpy as np\nimport torch as th\nimport cv2\nfrom gym3.types import DictType\nfrom gym import spaces\nfrom lib.action_mapping import CameraHierarchicalMapping\nfrom lib.actions import ActionTransformer\nfrom lib.policy import MinecraftAgentPolicy\nfrom lib.torch_util import default_device_type, set_default_torch_device\n# Hardcoded settings\nAGENT_RESOLUTION = (128, 128)\nPOLICY_KWARGS = dict(\n attention_heads=16,\n attention_mask_style=\"clipped_causal\",\n attention_memory_size=256,\n diff_mlp_embedding=False,\n hidsize=2048,\n img_shape=[128, 128, 3],\n impala_chans=[16, 32, 32],\n impala_kwargs={\"post_pool_groups\": 1},\n impala_width=8,\n init_norm_kwargs={\"batch_norm\": False, \"group_norm_groups\": 1},\n n_recurrence_layers=4,\n only_img_input=True,\n pointwise_ratio=4,\n pointwise_use_activation=False,\n recurrence_is_residual=True,\n recurrence_type=\"transformer\",\n timesteps=128,\n use_pointwise_layer=True,\n use_pre_lstm_ln=False,\n)\nPI_HEAD_KWARGS = dict(temperature=2.0)\nACTION_TRANSFORMER_KWARGS = dict("
+ },
+ {
+ "comment": "This code sets various parameters for a game agent. It defines action space, environment keywords, and camera settings for the game environment. The action space includes actions like ESC, attack, back, camera adjustment, dropping items, moving, using hotbar slots, inventory management, jumping, moving left/right, and sneaking. Environment keywords specify fields such as field of view (fov_range), frameskip, gamma value range, gui scale range, game resolution, and cursor size range. Camera settings include bin size, maximum value, mean value, and quantization scheme for the camera input.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/agent.py\":40-76",
+ "content": " camera_binsize=2,\n camera_maxval=10,\n camera_mu=10,\n camera_quantization_scheme=\"mu_law\",\n)\nENV_KWARGS = dict(\n fov_range=[70, 70],\n frameskip=1,\n gamma_range=[2, 2],\n guiscale_range=[1, 1],\n resolution=[640, 360],\n cursor_size_range=[16.0, 16.0],\n)\nTARGET_ACTION_SPACE = {\n \"ESC\": spaces.Discrete(2),\n \"attack\": spaces.Discrete(2),\n \"back\": spaces.Discrete(2),\n \"camera\": spaces.Box(low=-180.0, high=180.0, shape=(2,)),\n \"drop\": spaces.Discrete(2),\n \"forward\": spaces.Discrete(2),\n \"hotbar.1\": spaces.Discrete(2),\n \"hotbar.2\": spaces.Discrete(2),\n \"hotbar.3\": spaces.Discrete(2),\n \"hotbar.4\": spaces.Discrete(2),\n \"hotbar.5\": spaces.Discrete(2),\n \"hotbar.6\": spaces.Discrete(2),\n \"hotbar.7\": spaces.Discrete(2),\n \"hotbar.8\": spaces.Discrete(2),\n \"hotbar.9\": spaces.Discrete(2),\n \"inventory\": spaces.Discrete(2),\n \"jump\": spaces.Discrete(2),\n \"left\": spaces.Discrete(2),\n \"pickItem\": spaces.Discrete(2),\n \"right\": spaces.Discrete(2),\n \"sneak\": spaces.Discrete(2),"
+ },
+ {
+ "comment": "This code defines a dictionary ENV_KWARGS that contains expected environment settings for the MineRL environment. It also includes a function validate_env() to check if the environment is set up correctly and raise an error if any setting does not match the expected value. Additionally, it checks if the action space of the MineRL environment matches the expected actions and their respective spaces. The code also provides a comment suggesting to resize images using INTER_LINEAR resizing function for better results.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/agent.py\":77-100",
+ "content": " \"sprint\": spaces.Discrete(2),\n \"swapHands\": spaces.Discrete(2),\n \"use\": spaces.Discrete(2)\n}\ndef validate_env(env):\n \"\"\"Check that the MineRL environment is setup correctly, and raise if not\"\"\"\n for key, value in ENV_KWARGS.items():\n if key == \"frameskip\":\n continue\n if getattr(env.task, key) != value:\n raise ValueError(f\"MineRL environment setting {key} does not match {value}\")\n action_names = set(env.action_space.spaces.keys())\n if action_names != set(TARGET_ACTION_SPACE.keys()):\n raise ValueError(f\"MineRL action space does match. Expected actions {set(TARGET_ACTION_SPACE.keys())}\")\n for ac_space_name, ac_space_space in TARGET_ACTION_SPACE.items():\n if env.action_space.spaces[ac_space_name] != ac_space_space:\n raise ValueError(f\"MineRL action space setting {ac_space_name} does not match {ac_space_space}\")\ndef resize_image(img, target_resolution):\n # For your sanity, do not resize with any function than INTER_LINEAR"
+ },
+ {
+ "comment": "This code defines a MineRLAgent class with an __init__ method. It resizes the image using cv2.resize and returns it. The class has attributes for action_mapper, action_transformer, policy, and device. The policy is created with given policy_kwargs, pi_head_kwargs, and action_space. The device is set as the default torch device.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/agent.py\":101-127",
+ "content": " img = cv2.resize(img, target_resolution, interpolation=cv2.INTER_LINEAR)\n return img\nclass MineRLAgent:\n def __init__(self, env, device=None, policy_kwargs=None, pi_head_kwargs=None):\n validate_env(env)\n if device is None:\n device = default_device_type()\n self.device = th.device(device)\n # Set the default torch device for underlying code as well\n set_default_torch_device(self.device)\n self.action_mapper = CameraHierarchicalMapping(n_camera_bins=11)\n action_space = self.action_mapper.get_action_space_update()\n action_space = DictType(**action_space)\n self.action_transformer = ActionTransformer(**ACTION_TRANSFORMER_KWARGS)\n if policy_kwargs is None:\n policy_kwargs = POLICY_KWARGS\n if pi_head_kwargs is None:\n pi_head_kwargs = PI_HEAD_KWARGS\n agent_kwargs = dict(policy_kwargs=policy_kwargs, pi_head_kwargs=pi_head_kwargs, action_space=action_space)\n self.policy = MinecraftAgentPolicy(**agent_kwargs).to(device)"
+ },
+ {
+ "comment": "The code defines a class for an agent with methods to reset its hidden state, convert MineRL observations into the model's observation format, and convert policy output into actions for the MineRL environment. It also includes a method to load model weights from a given path while resetting the hidden state.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/agent.py\":128-153",
+ "content": " self.hidden_state = self.policy.initial_state(1)\n self._dummy_first = th.from_numpy(np.array((False,))).to(device)\n def load_weights(self, path):\n \"\"\"Load model weights from a path, and reset hidden state\"\"\"\n self.policy.load_state_dict(th.load(path, map_location=self.device), strict=False)\n self.reset()\n def reset(self):\n \"\"\"Reset agent to initial state (i.e., reset hidden state)\"\"\"\n self.hidden_state = self.policy.initial_state(1)\n def _env_obs_to_agent(self, minerl_obs):\n \"\"\"\n Turn observation from MineRL environment into model's observation\n Returns torch tensors.\n \"\"\"\n agent_input = resize_image(minerl_obs[\"pov\"], AGENT_RESOLUTION)[None]\n agent_input = {\"img\": th.from_numpy(agent_input).to(self.device)}\n return agent_input\n def _agent_action_to_env(self, agent_action):\n \"\"\"Turn output from policy into action for MineRL\"\"\"\n # This is quite important step (for some reason).\n # For the sake of your sanity, remember to do this step (manual conversion to numpy)"
+ },
+ {
+ "comment": "This code snippet is part of a function that transforms actions from MineRL format to the model's action format and vice versa. It also handles the conversion between PyTorch tensors and numpy arrays, and checks if the action is null (no action).",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/agent.py\":154-174",
+ "content": " # before proceeding. Otherwise, your agent might be a little derp.\n action = agent_action\n if isinstance(action[\"buttons\"], th.Tensor):\n action = {\n \"buttons\": agent_action[\"buttons\"].cpu().numpy(),\n \"camera\": agent_action[\"camera\"].cpu().numpy()\n }\n minerl_action = self.action_mapper.to_factored(action)\n minerl_action_transformed = self.action_transformer.policy2env(minerl_action)\n return minerl_action_transformed\n def _env_action_to_agent(self, minerl_action_transformed, to_torch=False, check_if_null=False):\n \"\"\"\n Turn action from MineRL to model's action.\n Note that this will add batch dimensions to the action.\n Returns numpy arrays, unless `to_torch` is True, in which case it returns torch tensors.\n If `check_if_null` is True, check if the action is null (no action) after the initial\n transformation. This matches the behaviour done in OpenAI's VPT work.\n If action is null, return \"None\" instead"
+ },
+ {
+ "comment": "This code defines a class that takes MineRL observations as input and outputs the corresponding action. It includes methods for transforming actions, mapping actions, and getting an agent's action for a given observation. The action is returned with batch dimensions if necessary, and can be converted to PyTorch tensors if needed.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/agent.py\":175-198",
+ "content": " \"\"\"\n minerl_action = self.action_transformer.env2policy(minerl_action_transformed)\n if check_if_null:\n if np.all(minerl_action[\"buttons\"] == 0) and np.all(minerl_action[\"camera\"] == self.action_transformer.camera_zero_bin):\n return None\n # Add batch dims if not existant\n if minerl_action[\"camera\"].ndim == 1:\n minerl_action = {k: v[None] for k, v in minerl_action.items()}\n action = self.action_mapper.from_factored(minerl_action)\n if to_torch:\n action = {k: th.from_numpy(v).to(self.device) for k, v in action.items()}\n return action\n def get_action(self, minerl_obs):\n \"\"\"\n Get agent's action for given MineRL observation.\n Agent's hidden state is tracked internally. To reset it,\n call `reset()`.\n \"\"\"\n agent_input = self._env_obs_to_agent(minerl_obs)\n # The \"first\" argument could be used to reset tell episode\n # boundaries, but we are only using this for predicting (for now),"
+ },
+ {
+ "comment": "This code selects an action from the agent's policy and returns it after converting to environment format.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/agent.py\":199-205",
+ "content": " # so we do not hassle with it yet.\n agent_action, self.hidden_state, _ = self.policy.act(\n agent_input, self._dummy_first, self.hidden_state,\n stochastic=True\n )\n minerl_action = self._agent_action_to_env(agent_action)\n return minerl_action"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/0a53cd4c-c664-4838-9098-c9716ebd56cd.json b/docs/doc/0a53cd4c-c664-4838-9098-c9716ebd56cd.json
new file mode 100644
index 0000000..f1340f5
--- /dev/null
+++ b/docs/doc/0a53cd4c-c664-4838-9098-c9716ebd56cd.json
@@ -0,0 +1,50 @@
+{
+ "summary": "The code imports libraries, initializes a data loader class for simpler code, lacks sub-sequence support, and processes data for a batch of samples with workers outputting all samples to the same batch. The `__del__` method terminates and joins processes when object is deleted.",
+ "details": [
+ {
+ "comment": "This code imports necessary libraries and defines functions for loading OpenAI MineRL VPT datasets, adjusting cursor position based on version-specific scalers, and compositing images with alpha transparency.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/data_loader.py\":0-39",
+ "content": "# Code for loading OpenAI MineRL VPT datasets\n# NOTE: This is NOT original code used for the VPT experiments!\n# (But contains all [or at least most] steps done in the original data loading)\nimport json\nimport glob\nimport os\nimport random\nfrom multiprocessing import Process, Queue, Event\nimport numpy as np\nimport cv2\nfrom run_inverse_dynamics_model import json_action_to_env_action\nfrom agent import resize_image, AGENT_RESOLUTION\nQUEUE_TIMEOUT = 10\nCURSOR_FILE = os.path.join(os.path.dirname(__file__), \"cursors\", \"mouse_cursor_white_16x16.png\")\nMINEREC_ORIGINAL_HEIGHT_PX = 720\n# If GUI is open, mouse dx/dy need also be adjusted with these scalers.\n# If data version is not present, assume it is 1.\nMINEREC_VERSION_SPECIFIC_SCALERS = {\n \"5.7\": 0.5,\n \"5.8\": 0.5,\n \"6.7\": 2.0,\n \"6.8\": 2.0,\n \"6.9\": 2.0,\n}\ndef composite_images_with_alpha(image1, image2, alpha, x, y):\n \"\"\"\n Draw image2 over image1 at location x,y, using alpha as the opacity for image2.\n Modifies image1 in-place\n \"\"\"\n ch = max(0, min(image1.shape[0] - y, image2.shape[0]))"
+ },
+ {
+ "comment": "The code reads a video and its corresponding JSON file to extract frames and annotations for each frame. It initializes a cursor image and alpha channel, then continuously processes tasks from the tasks queue. If a task is None, it breaks the loop. The code checks if the video contains the game starting with attack always down by noting that it might be stuck down until the player presses attack.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/data_loader.py\":40-66",
+ "content": " cw = max(0, min(image1.shape[1] - x, image2.shape[1]))\n if ch == 0 or cw == 0:\n return\n alpha = alpha[:ch, :cw]\n image1[y:y + ch, x:x + cw, :] = (image1[y:y + ch, x:x + cw, :] * (1 - alpha) + image2[:ch, :cw, :] * alpha).astype(np.uint8)\ndef data_loader_worker(tasks_queue, output_queue, quit_workers_event):\n \"\"\"\n Worker for the data loader.\n \"\"\"\n cursor_image = cv2.imread(CURSOR_FILE, cv2.IMREAD_UNCHANGED)\n # Assume 16x16\n cursor_image = cursor_image[:16, :16, :]\n cursor_alpha = cursor_image[:, :, 3:] / 255.0\n cursor_image = cursor_image[:, :, :3]\n while True:\n task = tasks_queue.get()\n if task is None:\n break\n trajectory_id, video_path, json_path = task\n video = cv2.VideoCapture(video_path)\n # NOTE: In some recordings, the game seems to start\n # with attack always down from the beginning, which\n # is stuck down until player actually presses attack\n # NOTE: It is uncertain if this was the issue with the original code."
+ },
+ {
+ "comment": "Checking if attack is stuck by monitoring scrollwheel actions and updating \"hotbar.#\" actions when hotbar selection changes.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/data_loader.py\":67-90",
+ "content": " attack_is_stuck = False\n # Scrollwheel is allowed way to change items, but this is\n # not captured by the recorder.\n # Work around this by keeping track of selected hotbar item\n # and updating \"hotbar.#\" actions when hotbar selection changes.\n # NOTE: It is uncertain is this was/is an issue with the contractor data\n last_hotbar = 0\n with open(json_path) as json_file:\n json_lines = json_file.readlines()\n json_data = \"[\" + \",\".join(json_lines) + \"]\"\n json_data = json.loads(json_data)\n for i in range(len(json_data)):\n if quit_workers_event.is_set():\n break\n step_data = json_data[i]\n if i == 0:\n # Check if attack will be stuck down\n if step_data[\"mouse\"][\"newButtons\"] == [0]:\n attack_is_stuck = True\n elif attack_is_stuck:\n # Check if we press attack down, then it might not be stuck\n if 0 in step_data[\"mouse\"][\"newButtons\"]:"
+ },
+ {
+ "comment": "Checking for stuck state and removing action, updating hotbar selection, reading frame even if null to progress forward.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/data_loader.py\":91-112",
+ "content": " attack_is_stuck = False\n # If still stuck, remove the action\n if attack_is_stuck:\n step_data[\"mouse\"][\"buttons\"] = [button for button in step_data[\"mouse\"][\"buttons\"] if button != 0]\n action, is_null_action = json_action_to_env_action(step_data)\n # Update hotbar selection\n current_hotbar = step_data[\"hotbar\"]\n if current_hotbar != last_hotbar:\n action[\"hotbar.{}\".format(current_hotbar + 1)] = 1\n last_hotbar = current_hotbar\n # Read frame even if this is null so we progress forward\n ret, frame = video.read()\n if ret:\n # Skip null actions as done in the VPT paper\n # NOTE: in VPT paper, this was checked _after_ transforming into agent's action-space.\n # We do this here as well to reduce amount of data sent over.\n if is_null_action:\n continue\n if step_data[\"isGuiOpen\"]:"
+ },
+ {
+ "comment": "Applies camera scaling factor to mouse coordinates, composes cursor image with frame, converts image color, clips and resizes the frame, then puts (trajectory_id, frame, action) in output queue. If frame cannot be read, prints an error message. Finally, releases video and checks quit_workers_event before putting None in output queue to signal end of data loading.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/data_loader.py\":113-133",
+ "content": " camera_scaling_factor = frame.shape[0] / MINEREC_ORIGINAL_HEIGHT_PX\n cursor_x = int(step_data[\"mouse\"][\"x\"] * camera_scaling_factor)\n cursor_y = int(step_data[\"mouse\"][\"y\"] * camera_scaling_factor)\n composite_images_with_alpha(frame, cursor_image, cursor_alpha, cursor_x, cursor_y)\n cv2.cvtColor(frame, code=cv2.COLOR_BGR2RGB, dst=frame)\n frame = np.asarray(np.clip(frame, 0, 255), dtype=np.uint8)\n frame = resize_image(frame, AGENT_RESOLUTION)\n output_queue.put((trajectory_id, frame, action), timeout=QUEUE_TIMEOUT)\n else:\n print(f\"Could not read frame from video {video_path}\")\n video.release()\n if quit_workers_event.is_set():\n break\n # Tell that we ended\n output_queue.put(None)\nclass DataLoader:\n \"\"\"\n Generator class for loading batches from a dataset\n This only returns a single step at a time per worker; no sub-sequences."
+ },
+ {
+ "comment": "This code initializes a data loader class that tracks the model's hidden state and feeds it along with one sample at a time. It supports simpler loader code, lower end hardware, but is not very efficient and lacks support for sub-sequences. The loader loads individual files as trajectory files if they are split into multiple files.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/data_loader.py\":134-154",
+ "content": " Idea is that you keep track of the model's hidden state and feed that in,\n along with one sample at a time.\n + Simpler loader code\n + Supports lower end hardware\n - Not very efficient (could be faster)\n - No support for sub-sequences\n - Loads up individual files as trajectory files (i.e. if a trajectory is split into multiple files,\n this code will load it up as a separate item).\n \"\"\"\n def __init__(self, dataset_dir, n_workers=8, batch_size=8, n_epochs=1, max_queue_size=16):\n assert n_workers >= batch_size, \"Number of workers must be equal or greater than batch size\"\n self.dataset_dir = dataset_dir\n self.n_workers = n_workers\n self.n_epochs = n_epochs\n self.batch_size = batch_size\n self.max_queue_size = max_queue_size\n unique_ids = glob.glob(os.path.join(dataset_dir, \"*.mp4\"))\n unique_ids = list(set([os.path.basename(x).split(\".\")[0] for x in unique_ids]))\n self.unique_ids = unique_ids\n # Create tuples of (video_path, json_path) for each unique_id"
+ },
+ {
+ "comment": "This code is creating a data loader for video demonstrations. It collects the video and JSONL file paths for each unique ID, shuffles them for each epoch, and adds them to the task queue. It also creates output queues for worker threads.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/data_loader.py\":155-177",
+ "content": " demonstration_tuples = []\n for unique_id in unique_ids:\n video_path = os.path.abspath(os.path.join(dataset_dir, unique_id + \".mp4\"))\n json_path = os.path.abspath(os.path.join(dataset_dir, unique_id + \".jsonl\"))\n demonstration_tuples.append((video_path, json_path))\n assert n_workers <= len(demonstration_tuples), f\"n_workers should be lower or equal than number of demonstrations {len(demonstration_tuples)}\"\n # Repeat dataset for n_epochs times, shuffling the order for\n # each epoch\n self.demonstration_tuples = []\n for i in range(n_epochs):\n random.shuffle(demonstration_tuples)\n self.demonstration_tuples += demonstration_tuples\n self.task_queue = Queue()\n self.n_steps_processed = 0\n for trajectory_id, task in enumerate(self.demonstration_tuples):\n self.task_queue.put((trajectory_id, *task))\n for _ in range(n_workers):\n self.task_queue.put(None)\n self.output_queues = [Queue(maxsize=max_queue_size) for _ in range(n_workers)]"
+ },
+ {
+ "comment": "This code sets up data loading workers as separate processes, and then starts them. The iterator function retrieves batch frames, actions, and episode IDs from the output queues of these worker processes until one of the workers runs out of work.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/data_loader.py\":178-208",
+ "content": " self.quit_workers_event = Event()\n self.processes = [\n Process(\n target=data_loader_worker,\n args=(\n self.task_queue,\n output_queue,\n self.quit_workers_event,\n ),\n daemon=True\n )\n for output_queue in self.output_queues\n ]\n for process in self.processes:\n process.start()\n def __iter__(self):\n return self\n def __next__(self):\n batch_frames = []\n batch_actions = []\n batch_episode_id = []\n for i in range(self.batch_size):\n workitem = self.output_queues[self.n_steps_processed % self.n_workers].get(timeout=QUEUE_TIMEOUT)\n if workitem is None:\n # Stop iteration when first worker runs out of work to do.\n # Yes, this has a chance of cutting out a lot of the work,\n # but this ensures batches will remain diverse, instead\n # of having bad ones in the end where potentially"
+ },
+ {
+ "comment": "This code is processing data for a batch of samples, where each worker outputs all samples to the same batch. It appends frames, actions, and episode IDs to their respective lists before returning them as a batch. The `__del__` method ensures all processes are terminated and joined when the object is deleted.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/data_loader.py\":209-221",
+ "content": " # one worker outputs all samples to the same batch.\n raise StopIteration()\n trajectory_id, frame, action = workitem\n batch_frames.append(frame)\n batch_actions.append(action)\n batch_episode_id.append(trajectory_id)\n self.n_steps_processed += 1\n return batch_frames, batch_actions, batch_episode_id\n def __del__(self):\n for process in self.processes:\n process.terminate()\n process.join()"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/16ac124a-b251-4a99-8683-e0d477357768.json b/docs/doc/16ac124a-b251-4a99-8683-e0d477357768.json
new file mode 100644
index 0000000..d76458b
--- /dev/null
+++ b/docs/doc/16ac124a-b251-4a99-8683-e0d477357768.json
@@ -0,0 +1,35 @@
+{
+ "summary": "The code includes Minecraft action classes with quantization schemes and an ActionTransformer, along with three functions for mapping item IDs to names and converting environment data to policy format.",
+ "details": [
+ {
+ "comment": "This code defines classes for various action types and a camera quantizer in the context of Minecraft gameplay. The Buttons class represents different action buttons like attack, jump, inventory, etc. SyntheticButtons includes composite/scripted actions. QuantizationScheme has options for linear or mu-law quantization. CameraQuantizer is responsible for discretizing and undiscretizing continuous camera input (pitch and yaw).",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/actions.py\":0-53",
+ "content": "import attr\nimport minerl.herobraine.hero.mc as mc\nimport numpy as np\nfrom lib.minecraft_util import store_args\nclass Buttons:\n ATTACK = \"attack\"\n BACK = \"back\"\n FORWARD = \"forward\"\n JUMP = \"jump\"\n LEFT = \"left\"\n RIGHT = \"right\"\n SNEAK = \"sneak\"\n SPRINT = \"sprint\"\n USE = \"use\"\n DROP = \"drop\"\n INVENTORY = \"inventory\"\n ALL = [\n ATTACK,\n BACK,\n FORWARD,\n JUMP,\n LEFT,\n RIGHT,\n SNEAK,\n SPRINT,\n USE,\n DROP,\n INVENTORY,\n ] + [f\"hotbar.{i}\" for i in range(1, 10)]\nclass SyntheticButtons:\n # Composite / scripted actions\n CHANNEL_ATTACK = \"channel-attack\"\n ALL = [CHANNEL_ATTACK]\nclass QuantizationScheme:\n LINEAR = \"linear\"\n MU_LAW = \"mu_law\"\n@attr.s(auto_attribs=True)\nclass CameraQuantizer:\n \"\"\"\n A camera quantizer that discretizes and undiscretizes a continuous camera input with y (pitch) and x (yaw) components.\n Parameters:\n - camera_binsize: The size of the bins used for quantization. In case of mu-law quantization, it corresponds to the average binsize."
+ },
+ {
+ "comment": "This code defines two quantization schemes for camera actions: linear and mu-law. It also provides reference values for the mu parameter based on maxval and desired max_precision for mu-law encoding.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/actions.py\":54-67",
+ "content": " - camera_maxval: The maximum value of the camera action.\n - quantization_scheme: The quantization scheme to use. Currently, two quantization schemes are supported:\n - Linear quantization (default): Camera actions are split uniformly into discrete bins\n - Mu-law quantization: Transforms the camera action using mu-law encoding (https://en.wikipedia.org/wiki/%CE%9C-law_algorithm)\n followed by the same quantization scheme used by the linear scheme.\n - mu: Mu is the parameter that defines the curvature of the mu-law encoding. Higher values of\n mu will result in a sharper transition near zero. Below are some reference values listed\n for choosing mu given a constant maxval and a desired max_precision value.\n maxval = 10 | max_precision = 0.5 | \u03bc \u2248 2.93826\n maxval = 10 | max_precision = 0.4 | \u03bc \u2248 4.80939\n maxval = 10 | max_precision = 0.25 | \u03bc \u2248 11.4887\n maxval = 20 | max_precision = 0.5 | \u03bc \u2248 2.7\n maxval = 20 | max_precision = 0.4 | \u03bc \u2248 4.39768\n maxval = 20 | max_precision = 0.25 | \u03bc \u2248 10.3194"
+ },
+ {
+ "comment": "This code defines a class with properties for camera max value, bin size, quantization scheme (linear or Mu-Law), and mu value. The discretize method takes in xy coordinates, clips them within the camera range, applies the specified quantization scheme to discretize the values, and returns the rounded values as integers. The undiscretize method takes in discretized values and converts them back to their original continuous representation by multiplying with the bin size and subtracting the camera max value.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/actions.py\":68-94",
+ "content": " maxval = 40 | max_precision = 0.5 | \u03bc \u2248 2.60780\n maxval = 40 | max_precision = 0.4 | \u03bc \u2248 4.21554\n maxval = 40 | max_precision = 0.25 | \u03bc \u2248 9.81152\n \"\"\"\n camera_maxval: int\n camera_binsize: int\n quantization_scheme: str = attr.ib(\n default=QuantizationScheme.LINEAR,\n validator=attr.validators.in_([QuantizationScheme.LINEAR, QuantizationScheme.MU_LAW]),\n )\n mu: float = attr.ib(default=5)\n def discretize(self, xy):\n xy = np.clip(xy, -self.camera_maxval, self.camera_maxval)\n if self.quantization_scheme == QuantizationScheme.MU_LAW:\n xy = xy / self.camera_maxval\n v_encode = np.sign(xy) * (np.log(1.0 + self.mu * np.abs(xy)) / np.log(1.0 + self.mu))\n v_encode *= self.camera_maxval\n xy = v_encode\n # Quantize using linear scheme\n return np.round((xy + self.camera_maxval) / self.camera_binsize).astype(np.int64)\n def undiscretize(self, xy):\n xy = xy * self.camera_binsize - self.camera_maxval"
+ },
+ {
+ "comment": "This code defines a class called `ActionTransformer` that transforms actions between internal arrays and the MinerL environment format. It includes methods for discretizing and undiscretizing camera data, as well as calculating a zero bin value based on camera binsize. If the quantization scheme is set to \"mu_law\", it applies the mu-law quantization method to the input data.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/actions.py\":96-129",
+ "content": " if self.quantization_scheme == QuantizationScheme.MU_LAW:\n xy = xy / self.camera_maxval\n v_decode = np.sign(xy) * (1.0 / self.mu) * ((1.0 + self.mu) ** np.abs(xy) - 1.0)\n v_decode *= self.camera_maxval\n xy = v_decode\n return xy\nclass ActionTransformer:\n \"\"\"Transforms actions between internal array and minerl env format.\"\"\"\n @store_args\n def __init__(\n self,\n camera_maxval=10,\n camera_binsize=2,\n camera_quantization_scheme=\"linear\",\n camera_mu=5,\n ):\n self.quantizer = CameraQuantizer(\n camera_maxval=camera_maxval,\n camera_binsize=camera_binsize,\n quantization_scheme=camera_quantization_scheme,\n mu=camera_mu,\n )\n def camera_zero_bin(self):\n return self.camera_maxval // self.camera_binsize\n def discretize_camera(self, xy):\n return self.quantizer.discretize(xy)\n def undiscretize_camera(self, pq):\n return self.quantizer.undiscretize(pq)"
+ },
+ {
+ "comment": "The code contains three functions:\n\n1. item_embed_id_to_name(): This function converts an item ID to its name using the mc.MINERL_ITEM_MAP dictionary.\n2. dict_to_numpy(): This function transforms environment format data to policy output format, creating a dictionary \"act\" containing buttons and camera values in numpy array format. If human-spaces is False, it adds synthetic_buttons, place, equip, and craft values as well.\n3. numpy_to_dict(): This function converts numpy policy output to an environment-compatible format, ensuring the buttons shape matches the expected size.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/actions.py\":131-159",
+ "content": " def item_embed_id_to_name(self, item_id):\n return mc.MINERL_ITEM_MAP[item_id]\n def dict_to_numpy(self, acs):\n \"\"\"\n Env format to policy output format.\n \"\"\"\n act = {\n \"buttons\": np.stack([acs.get(k, 0) for k in Buttons.ALL], axis=-1),\n \"camera\": self.discretize_camera(acs[\"camera\"]),\n }\n if not self.human_spaces:\n act.update(\n {\n \"synthetic_buttons\": np.stack([acs[k] for k in SyntheticButtons.ALL], axis=-1),\n \"place\": self.item_embed_name_to_id(acs[\"place\"]),\n \"equip\": self.item_embed_name_to_id(acs[\"equip\"]),\n \"craft\": self.item_embed_name_to_id(acs[\"craft\"]),\n }\n )\n return act\n def numpy_to_dict(self, acs):\n \"\"\"\n Numpy policy output to env-compatible format.\n \"\"\"\n assert acs[\"buttons\"].shape[-1] == len(\n Buttons.ALL\n ), f\"Mismatched actions: {acs}; expected {len(Buttons.ALL)}:\\n( {Buttons.ALL})\""
+ },
+ {
+ "comment": "The code defines three methods: \"undiscretize_camera\", \"numpy_to_dict\", and \"discretize_camera\". It converts a camera array to its undiscretized form, converts numpy arrays to dictionaries, and converts an undiscretized camera array back into discretized form, respectively.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/actions.py\":160-177",
+ "content": " out = {name: acs[\"buttons\"][..., i] for (i, name) in enumerate(Buttons.ALL)}\n out[\"camera\"] = self.undiscretize_camera(acs[\"camera\"])\n return out\n def policy2env(self, acs):\n acs = self.numpy_to_dict(acs)\n return acs\n def env2policy(self, acs):\n nbatch = acs[\"camera\"].shape[0]\n dummy = np.zeros((nbatch,))\n out = {\n \"camera\": self.discretize_camera(acs[\"camera\"]),\n \"buttons\": np.stack([acs.get(k, dummy) for k in Buttons.ALL], axis=-1),\n }\n return out"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/198b2885-52e2-44a7-8f1a-ce757f808760.json b/docs/doc/198b2885-52e2-44a7-8f1a-ce757f808760.json
new file mode 100644
index 0000000..a94d2c8
--- /dev/null
+++ b/docs/doc/198b2885-52e2-44a7-8f1a-ce757f808760.json
@@ -0,0 +1,60 @@
+{
+ "summary": "The code introduces an `ActionHead` abstract base class for reinforcement learning action heads, including methods such as logprob, sample, entropy, and kl_divergence. It supports Discrete, Real, and DictType action spaces and has reset parameters and forward pass functionality.",
+ "details": [
+ {
+ "comment": "This code defines an ActionHead class and a fan_in_linear function. ActionHead is an abstract base class for action heads, which are used in reinforcement learning to determine the optimal actions. The fan_in_linear function initializes the weights of the linear layer using the Fan-in initialization method.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":0-35",
+ "content": "import logging\nfrom typing import Any, Tuple\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.nn.init as init\nfrom gym3.types import DictType, Discrete, Real, TensorType, ValType\nLOG0 = -100\ndef fan_in_linear(module: nn.Module, scale=1.0, bias=True):\n \"\"\"Fan-in init\"\"\"\n module.weight.data *= scale / module.weight.norm(dim=1, p=2, keepdim=True)\n if bias:\n module.bias.data *= 0\nclass ActionHead(nn.Module):\n \"\"\"Abstract base class for action heads compatible with forc\"\"\"\n def forward(self, input_data: torch.Tensor) -> Any:\n \"\"\"\n Just a forward pass through this head\n :returns pd_params - parameters describing the probability distribution\n \"\"\"\n raise NotImplementedError\n def logprob(self, action_sample: torch.Tensor, pd_params: torch.Tensor) -> torch.Tensor:\n \"\"\"Logartithm of probability of sampling `action_sample` from a probability described by `pd_params`\"\"\"\n raise NotImplementedError\n def entropy(self, pd_params: torch.Tensor) -> torch.Tensor:"
+ },
+ {
+ "comment": "This code defines an abstract base class `ActionHead` for entropy, sampling, and KL divergence calculation. It raises a NotImplementedError since subclasses should provide the actual implementation of these methods. The `DiagGaussianActionHead` class is also defined, which inherits from `ActionHead`, representing action heads with normally distributed uncorrelated variables based on network output mean and standard deviation parameters.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":36-62",
+ "content": " \"\"\"Entropy of this distribution\"\"\"\n raise NotImplementedError\n def sample(self, pd_params: torch.Tensor, deterministic: bool = False) -> Any:\n \"\"\"\n Draw a sample from probability distribution given by those params\n :param pd_params Parameters of a probability distribution\n :param deterministic Whether to return a stochastic sample or deterministic mode of a distribution\n \"\"\"\n raise NotImplementedError\n def kl_divergence(self, params_q: torch.Tensor, params_p: torch.Tensor) -> torch.Tensor:\n \"\"\"KL divergence between two distribution described by these two params\"\"\"\n raise NotImplementedError\nclass DiagGaussianActionHead(ActionHead):\n \"\"\"\n Action head where actions are normally distributed uncorrelated variables with specific means and variances.\n Means are calculated directly from the network while standard deviations are a parameter of this module\n \"\"\"\n LOG2PI = np.log(2.0 * np.pi)\n def __init__(self, input_dim: int, num_dimensions: int):"
+ },
+ {
+ "comment": "Initializes an action head with specified input and output dimensions, sets the linear layer's weight and bias using orthogonal initialization and assigns them to None respectively.\nDefines methods to reset parameters for the action head, forward propagates data through linear layer to obtain means, and calculates log probabilities of action samples given parameters.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":63-88",
+ "content": " super().__init__()\n self.input_dim = input_dim\n self.num_dimensions = num_dimensions\n self.linear_layer = nn.Linear(input_dim, num_dimensions)\n self.log_std = nn.Parameter(torch.zeros(num_dimensions), requires_grad=True)\n def reset_parameters(self):\n init.orthogonal_(self.linear_layer.weight, gain=0.01)\n init.constant_(self.linear_layer.bias, 0.0)\n def forward(self, input_data: torch.Tensor, mask=None) -> torch.Tensor:\n assert not mask, \"Can not use a mask in a gaussian action head\"\n means = self.linear_layer(input_data)\n # Unsqueeze many times to get to the same shape\n logstd = self.log_std[(None,) * (len(means.shape) - 1)]\n mean_view, logstd = torch.broadcast_tensors(means, logstd)\n return torch.stack([mean_view, logstd], dim=-1)\n def logprob(self, action_sample: torch.Tensor, pd_params: torch.Tensor) -> torch.Tensor:\n \"\"\"Log-likelihood\"\"\"\n means = pd_params[..., 0]\n log_std = pd_params[..., 1]"
+ },
+ {
+ "comment": "Line 90: Calculate standard deviation from log_std\nLine 116: Calculate z-score for action sample\nLine 117: Return negative of sum of log probabilities\n\nComment for code: This code calculates the categorical distribution entropy, sample from a diagonal Gaussian distribution, and KL divergence for two sets of parameters.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":90-118",
+ "content": " std = torch.exp(log_std)\n z_score = (action_sample - means) / std\n return -(0.5 * ((z_score ** 2 + self.LOG2PI).sum(dim=-1)) + log_std.sum(dim=-1))\n def entropy(self, pd_params: torch.Tensor) -> torch.Tensor:\n \"\"\"\n Categorical distribution entropy calculation - sum probs * log(probs).\n In case of diagonal gaussian distribution - 1/2 log(2 pi e sigma^2)\n \"\"\"\n log_std = pd_params[..., 1]\n return (log_std + 0.5 * (self.LOG2PI + 1)).sum(dim=-1)\n def sample(self, pd_params: torch.Tensor, deterministic: bool = False) -> torch.Tensor:\n means = pd_params[..., 0]\n log_std = pd_params[..., 1]\n if deterministic:\n return means\n else:\n return torch.randn_like(means) * torch.exp(log_std) + means\n def kl_divergence(self, params_q: torch.Tensor, params_p: torch.Tensor) -> torch.Tensor:\n \"\"\"\n Categorical distribution KL divergence calculation\n KL(Q || P) = sum Q_i log (Q_i / P_i)\n Formula is:"
+ },
+ {
+ "comment": "This code defines an ActionHead class with categorical actions. It initializes the action head with input_dim, num_actions, shape, builtin_linear_layer (optional), and temperature parameters. If builtin_linear_layer is True, it uses a linear layer for feature extraction. The output shape is determined by the input shape and number of actions.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":119-150",
+ "content": " log(sigma_p) - log(sigma_q) + (sigma_q^2 + (mu_q - mu_p)^2))/(2 * sigma_p^2)\n \"\"\"\n means_q = params_q[..., 0]\n log_std_q = params_q[..., 1]\n means_p = params_p[..., 0]\n log_std_p = params_p[..., 1]\n std_q = torch.exp(log_std_q)\n std_p = torch.exp(log_std_p)\n kl_div = log_std_p - log_std_q + (std_q ** 2 + (means_q - means_p) ** 2) / (2.0 * std_p ** 2) - 0.5\n return kl_div.sum(dim=-1, keepdim=True)\nclass CategoricalActionHead(ActionHead):\n \"\"\"Action head with categorical actions\"\"\"\n def __init__(\n self, input_dim: int, shape: Tuple[int], num_actions: int, builtin_linear_layer: bool = True, temperature: float = 1.0\n ):\n super().__init__()\n self.input_dim = input_dim\n self.num_actions = num_actions\n self.output_shape = shape + (num_actions,)\n self.temperature = temperature\n if builtin_linear_layer:\n self.linear_layer = nn.Linear(input_dim, np.prod(self.output_shape))\n else:"
+ },
+ {
+ "comment": "This code defines a class for an action head, which is responsible for outputting action probabilities from input data. It asserts that the input dimension matches the number of actions, and if a linear layer is not None, it initializes its parameters orthogonally with gain 0.01 and sets the bias to 0. The forward function computes the output by either passing the input through a linear layer or using the input directly, reshapes the result based on the output shape, scales the result by temperature, applies a mask if provided, and then returns the log softmax of the shaped output as float32.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":151-173",
+ "content": " assert (\n input_dim == num_actions\n ), f\"If input_dim ({input_dim}) != num_actions ({num_actions}), you need a linear layer to convert them.\"\n self.linear_layer = None\n def reset_parameters(self):\n if self.linear_layer is not None:\n init.orthogonal_(self.linear_layer.weight, gain=0.01)\n init.constant_(self.linear_layer.bias, 0.0)\n finit.fan_in_linear(self.linear_layer, scale=0.01)\n def forward(self, input_data: torch.Tensor, mask=None) -> Any:\n if self.linear_layer is not None:\n flat_out = self.linear_layer(input_data)\n else:\n flat_out = input_data\n shaped_out = flat_out.reshape(flat_out.shape[:-1] + self.output_shape)\n shaped_out /= self.temperature\n if mask is not None:\n shaped_out[~mask] = LOG0\n # Convert to float32 to avoid RuntimeError: \"log_softmax_lastdim_kernel_impl\" not implemented for 'Half'\n return F.log_softmax(shaped_out.float(), dim=-1)"
+ },
+ {
+ "comment": "The code contains three functions: `logprob`, `entropy`, and `sample`. \n- The `logprob` function calculates the log probability of a given set of actions against the provided logits. It returns the result in torch format.\n- The `entropy` function calculates the entropy of a categorical distribution from the given logits. It also returns the entropy in torch format.\n- The `sample` function generates a sample from the distribution represented by the given logits. If `deterministic` is set to True, it will always return the same value.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":175-195",
+ "content": " def logprob(self, actions: torch.Tensor, logits: torch.Tensor) -> torch.Tensor:\n value = actions.long().unsqueeze(-1)\n value, log_pmf = torch.broadcast_tensors(value, logits)\n value = value[..., :1]\n result = log_pmf.gather(-1, value).squeeze(-1)\n # result is per-entry, still of size self.output_shape[:-1]; we need to reduce of the rest of it.\n for _ in self.output_shape[:-1]:\n result = result.sum(dim=-1)\n return result\n def entropy(self, logits: torch.Tensor) -> torch.Tensor:\n \"\"\"Categorical distribution entropy calculation - sum probs * log(probs)\"\"\"\n probs = torch.exp(logits)\n entropy = -torch.sum(probs * logits, dim=-1)\n # entropy is per-entry, still of size self.output_shape[:-1]; we need to reduce of the rest of it.\n for _ in self.output_shape[:-1]:\n entropy = entropy.sum(dim=-1)\n return entropy\n def sample(self, logits: torch.Tensor, deterministic: bool = False) -> Any:\n if deterministic:"
+ },
+ {
+ "comment": "\"Returns the index with maximum value in logits\"\n\"Applies Gumbel-Softmax trick for training with float16 precision\"\n\"Calculates KL divergence between two categorical distributions using logits\"",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":196-216",
+ "content": " return torch.argmax(logits, dim=-1)\n else:\n # Gumbel-Softmax trick.\n u = torch.rand_like(logits)\n # In float16, if you have around 2^{float_mantissa_bits} logits, sometimes you'll sample 1.0\n # Then the log(-log(1.0)) will give -inf when it should give +inf\n # This is a silly hack to get around that.\n # This hack does not skew the probability distribution, because this event can't possibly win the argmax.\n u[u == 1.0] = 0.999\n return torch.argmax(logits - torch.log(-torch.log(u)), dim=-1)\n def kl_divergence(self, logits_q: torch.Tensor, logits_p: torch.Tensor) -> torch.Tensor:\n \"\"\"\n Categorical distribution KL divergence calculation\n KL(Q || P) = sum Q_i log (Q_i / P_i)\n When talking about logits this is:\n sum exp(Q_i) * (Q_i - P_i)\n \"\"\"\n kl = (torch.exp(logits_q) * (logits_q - logits_p)).sum(-1, keepdim=True)\n # kl is per-entry, still of size self.output_shape; we need to reduce of the rest of it."
+ },
+ {
+ "comment": "This code defines a DictActionHead class, which is an action head with multiple sub-actions. The class has methods to reset parameters and perform forward pass. During the forward pass, it takes input data and optional keyword arguments, and calls the forward method on each of its submodules using the provided keyword arguments. The results from all submodules are stored in a dictionary.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":217-242",
+ "content": " for _ in self.output_shape[:-1]:\n kl = kl.sum(dim=-2) # dim=-2 because we use keepdim=True above.\n return kl\nclass DictActionHead(nn.ModuleDict):\n \"\"\"Action head with multiple sub-actions\"\"\"\n def reset_parameters(self):\n for subhead in self.values():\n subhead.reset_parameters()\n def forward(self, input_data: torch.Tensor, **kwargs) -> Any:\n \"\"\"\n :param kwargs: each kwarg should be a dict with keys corresponding to self.keys()\n e.g. if this ModuleDict has submodules keyed by 'A', 'B', and 'C', we could call:\n forward(input_data, foo={'A': True, 'C': False}, bar={'A': 7}}\n Then children will be called with:\n A: forward(input_data, foo=True, bar=7)\n B: forward(input_data)\n C: forward(input_Data, foo=False)\n \"\"\"\n result = {}\n for head_name, subhead in self.items():\n head_kwargs = {\n kwarg_name: kwarg[head_name]"
+ },
+ {
+ "comment": "The code defines an action head class that contains sub-heads corresponding to the environment's action space. It supports logprob, sample, entropy, and kl_divergence methods on a batch of actions and logits. The make_action_head function creates an action head based on the given action space and output size of the policy network.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":243-263",
+ "content": " for kwarg_name, kwarg in kwargs.items()\n if kwarg is not None and head_name in kwarg\n }\n result[head_name] = subhead(input_data, **head_kwargs)\n return result\n def logprob(self, actions: torch.Tensor, logits: torch.Tensor) -> torch.Tensor:\n return sum(subhead.logprob(actions[k], logits[k]) for k, subhead in self.items())\n def sample(self, logits: torch.Tensor, deterministic: bool = False) -> Any:\n return {k: subhead.sample(logits[k], deterministic) for k, subhead in self.items()}\n def entropy(self, logits: torch.Tensor) -> torch.Tensor:\n return sum(subhead.entropy(logits[k]) for k, subhead in self.items())\n def kl_divergence(self, logits_q: torch.Tensor, logits_p: torch.Tensor) -> torch.Tensor:\n return sum(subhead.kl_divergence(logits_q[k], logits_p[k]) for k, subhead in self.items())\ndef make_action_head(ac_space: ValType, pi_out_size: int, temperature: float = 1.0):\n \"\"\"Helper function to create an action head corresponding to the environment action space\"\"\""
+ },
+ {
+ "comment": "Checks the type of action space and returns a corresponding ActionHead object. Supports Discrete, Real, and DictType action spaces. Non-1 temperature and nontrivial shape actions are not implemented yet.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_head.py\":264-274",
+ "content": " if isinstance(ac_space, TensorType):\n if isinstance(ac_space.eltype, Discrete):\n return CategoricalActionHead(pi_out_size, ac_space.shape, ac_space.eltype.n, temperature=temperature)\n elif isinstance(ac_space.eltype, Real):\n if temperature != 1.0:\n logging.warning(\"Non-1 temperature not implemented for DiagGaussianActionHead.\")\n assert len(ac_space.shape) == 1, \"Nontrivial shapes not yet implemented.\"\n return DiagGaussianActionHead(pi_out_size, ac_space.shape[0])\n elif isinstance(ac_space, DictType):\n return DictActionHead({k: make_action_head(v, pi_out_size, temperature) for k, v in ac_space.items()})\n raise NotImplementedError(f\"Action space of type {type(ac_space)} is not supported\")"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/1af74e6e-3d8d-4619-88ff-310619755426.json b/docs/doc/1af74e6e-3d8d-4619-88ff-310619755426.json
new file mode 100644
index 0000000..74e8f14
--- /dev/null
+++ b/docs/doc/1af74e6e-3d8d-4619-88ff-310619755426.json
@@ -0,0 +1,50 @@
+{
+ "summary": "The code defines neural network functions for data processing, including ResidualRecurrentBlocks and BatchNorm2d initialization, as well as MLPs, LSTM/RNN layers, Transformer blocks with recurrent forward pass. It also includes a function `get_norm` for normalization and another function `_banded_repeat`.",
+ "details": [
+ {
+ "comment": "This code defines a function `get_module_log_keys_recursive` that recursively collects all keys that a module and its children want to log. It also defines a class `FanInInitReLULayer` which implements a slightly modified initialization for ReLU layers, initializing the weights with standard deviation of 1. The class takes parameters such as number of input and output channels, layer type (linear, conv or conv3d), initialization scale, and whether to use batch normalization.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/util.py\":0-29",
+ "content": "from typing import Dict, Optional\nimport torch as th\nfrom torch import nn\nfrom torch.nn import functional as F\nimport lib.torch_util as tu\nfrom lib.masked_attention import MaskedAttention\nfrom lib.minecraft_util import store_args\nfrom lib.tree_util import tree_map\ndef get_module_log_keys_recursive(m: nn.Module):\n \"\"\"Recursively get all keys that a module and its children want to log.\"\"\"\n keys = []\n if hasattr(m, \"get_log_keys\"):\n keys += m.get_log_keys()\n for c in m.children():\n keys += get_module_log_keys_recursive(c)\n return keys\nclass FanInInitReLULayer(nn.Module):\n \"\"\"Implements a slightly modified init that correctly produces std 1 outputs given ReLU activation\n :param inchan: number of input channels\n :param outchan: number of output channels\n :param layer_args: positional layer args\n :param layer_type: options are \"linear\" (dense layer), \"conv\" (2D Convolution), \"conv3d\" (3D convolution)\n :param init_scale: multiplier on initial weights\n :param batch_norm: use batch norm after the layer (for 2D data)"
+ },
+ {
+ "comment": "This code defines a function `__init__` which initializes an object. It takes various parameters like `inchan`, `outchan`, `layer_args`, `layer_type`, `init_scale`, `batch_norm`, `batch_norm_kwargs`, `group_norm_groups`, `layer_norm`, `use_activation`, and `log_scope`. It also takes keyword arguments like `**layer_kwargs`. The function sets the normalization type based on the values of these parameters. If `batch_norm` is True, it uses BatchNorm2d. If `group_norm_groups` is not None, it uses GroupNorm. And if `layer_norm` is True, it uses LayerNorm. It also sets the norm variable to None initially.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/util.py\":30-61",
+ "content": " :param group_norm_groups: if not None, use group norm with this many groups after the layer. Group norm 1\n would be equivalent of layernorm for 2D data.\n :param layer_norm: use layernorm after the layer (for 1D data)\n :param layer_kwargs: keyword arguments for the layer\n \"\"\"\n @store_args\n def __init__(\n self,\n inchan: int,\n outchan: int,\n *layer_args,\n layer_type: str = \"conv\",\n init_scale: int = 1,\n batch_norm: bool = False,\n batch_norm_kwargs: Dict = {},\n group_norm_groups: Optional[int] = None,\n layer_norm: bool = False,\n use_activation=True,\n log_scope: Optional[str] = None,\n **layer_kwargs,\n ):\n super().__init__()\n # Normalization\n self.norm = None\n if batch_norm:\n self.norm = nn.BatchNorm2d(inchan, **batch_norm_kwargs)\n elif group_norm_groups is not None:\n self.norm = nn.GroupNorm(group_norm_groups, inchan)\n elif layer_norm:\n self.norm = nn.LayerNorm(inchan)"
+ },
+ {
+ "comment": "This code defines a util module with functions for initializing and forwarding data through neural networks. The ResidualRecurrentBlocks class is used to create residual recurrent blocks, which help in improving the stability of the network during training.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/util.py\":63-93",
+ "content": " layer = dict(conv=nn.Conv2d, conv3d=nn.Conv3d, linear=nn.Linear)[layer_type]\n self.layer = layer(inchan, outchan, bias=self.norm is None, *layer_args, **layer_kwargs)\n # Init Weights (Fan-In)\n self.layer.weight.data *= init_scale / self.layer.weight.norm(\n dim=tuple(range(1, self.layer.weight.data.ndim)), p=2, keepdim=True\n )\n # Init Bias\n if self.layer.bias is not None:\n self.layer.bias.data *= 0\n def forward(self, x):\n \"\"\"Norm after the activation. Experimented with this for both IAM and BC and it was slightly better.\"\"\"\n if self.norm is not None:\n x = self.norm(x)\n x = self.layer(x)\n if self.use_activation:\n x = F.relu(x, inplace=True)\n return x\n def get_log_keys(self):\n return [\n f\"activation_mean/{self.log_scope}\",\n f\"activation_std/{self.log_scope}\",\n ]\nclass ResidualRecurrentBlocks(nn.Module):\n @store_args\n def __init__(\n self,"
+ },
+ {
+ "comment": "This code defines a class that initializes a list of ResidualRecurrentBlock instances, each with potentially different recurrence_type and block_kwargs. The forward method processes input through each block, while the initial_state method returns an initial state for the LSTM recurrence type based on batch size.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/util.py\":94-125",
+ "content": " n_block=2,\n recurrence_type=\"multi_layer_lstm\",\n is_residual=True,\n **block_kwargs,\n ):\n super().__init__()\n init_scale = n_block ** -0.5 if is_residual else 1\n self.blocks = nn.ModuleList(\n [\n ResidualRecurrentBlock(\n **block_kwargs,\n recurrence_type=recurrence_type,\n is_residual=is_residual,\n init_scale=init_scale,\n block_number=i,\n )\n for i in range(n_block)\n ]\n )\n def forward(self, x, first, state):\n state_out = []\n assert len(state) == len(\n self.blocks\n ), f\"Length of state {len(state)} did not match length of blocks {len(self.blocks)}\"\n for block, _s_in in zip(self.blocks, state):\n x, _s_o = block(x, first, _s_in)\n state_out.append(_s_o)\n return x, state_out\n def initial_state(self, batchsize):\n if \"lstm\" in self.recurrence_type:"
+ },
+ {
+ "comment": "The code defines a ResidualRecurrentBlock class, which is a type of neural network module. It initializes the block with specified parameters like hidsize, timesteps, init_scale, recurrence_type, and more. If is_residual and use_pointwise_layer are True, the mlp0 layer is added to the block with specific size and initialization settings. The method returns an array of initial states for each block in the self.blocks list.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/util.py\":126-160",
+ "content": " return [None for b in self.blocks]\n else:\n return [b.r.initial_state(batchsize) for b in self.blocks]\nclass ResidualRecurrentBlock(nn.Module):\n @store_args\n def __init__(\n self,\n hidsize,\n timesteps,\n init_scale=1,\n recurrence_type=\"multi_layer_lstm\",\n is_residual=True,\n use_pointwise_layer=True,\n pointwise_ratio=4,\n pointwise_use_activation=False,\n attention_heads=8,\n attention_memory_size=2048,\n attention_mask_style=\"clipped_causal\",\n log_scope=\"resblock\",\n block_number=0,\n ):\n super().__init__()\n self.log_scope = f\"{log_scope}{block_number}\"\n s = init_scale\n if use_pointwise_layer:\n if is_residual:\n s *= 2 ** -0.5 # second residual\n self.mlp0 = FanInInitReLULayer(\n hidsize,\n hidsize * pointwise_ratio,\n init_scale=1,\n layer_type=\"linear\",\n layer_norm=True,"
+ },
+ {
+ "comment": "Creating a multi-layer perceptron (MLP) for pointwise features and layer normalization for pre-training.\n\nInitializing the LSTM or Transformer recurrent layer if specified, using normal distribution with scale 's'.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/util.py\":161-183",
+ "content": " log_scope=self.log_scope + \"/ptwise_mlp0\",\n )\n self.mlp1 = FanInInitReLULayer(\n hidsize * pointwise_ratio,\n hidsize,\n init_scale=s,\n layer_type=\"linear\",\n use_activation=pointwise_use_activation,\n log_scope=self.log_scope + \"/ptwise_mlp1\",\n )\n self.pre_r_ln = nn.LayerNorm(hidsize)\n if recurrence_type in [\"multi_layer_lstm\", \"multi_layer_bilstm\"]:\n self.r = nn.LSTM(hidsize, hidsize, batch_first=True)\n nn.init.normal_(self.r.weight_hh_l0, std=s * (self.r.weight_hh_l0.shape[0] ** -0.5))\n nn.init.normal_(self.r.weight_ih_l0, std=s * (self.r.weight_ih_l0.shape[0] ** -0.5))\n self.r.bias_hh_l0.data *= 0\n self.r.bias_ih_l0.data *= 0\n elif recurrence_type == \"transformer\":\n self.r = MaskedAttention(\n input_size=hidsize,\n timesteps=timesteps,\n memory_size=attention_memory_size,"
+ },
+ {
+ "comment": "This function defines a recurrent forward pass for a Transformer block. It applies linear layers, LSTM/RNN, and optionally an MLP layer to input `x`. The result is returned along with the updated state.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/util.py\":184-215",
+ "content": " heads=attention_heads,\n init_scale=s,\n norm=\"none\",\n log_scope=log_scope + \"/sa\",\n use_muP_factor=True,\n mask=attention_mask_style,\n )\n def forward(self, x, first, state):\n residual = x\n x = self.pre_r_ln(x)\n x, state_out = recurrent_forward(\n self.r,\n x,\n first,\n state,\n reverse_lstm=self.recurrence_type == \"multi_layer_bilstm\" and (self.block_number + 1) % 2 == 0,\n )\n if self.is_residual and \"lstm\" in self.recurrence_type: # Transformer already residual.\n x = x + residual\n if self.use_pointwise_layer:\n # Residual MLP\n residual = x\n x = self.mlp1(self.mlp0(x))\n if self.is_residual:\n x = x + residual\n return x, state_out\ndef recurrent_forward(module, x, first, state, reverse_lstm=False):\n if isinstance(module, nn.LSTM):\n if state is not None:"
+ },
+ {
+ "comment": "This code is initializing a state for a recurrent model and passing input through the model. If reverse_lstm is True, it flips the input and output. The _banded_repeat function repeats an input sequence with a shift and the bandify function converts data from basis functions to a new shape.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/util.py\":216-252",
+ "content": " # In case recurrent models do not accept a \"first\" argument we zero out the hidden state here\n mask = 1 - first[:, 0, None, None].to(th.float)\n state = tree_map(lambda _s: _s * mask, state)\n state = tree_map(lambda _s: _s.transpose(0, 1), state) # NL, B, H\n if reverse_lstm:\n x = th.flip(x, [1])\n x, state_out = module(x, state)\n if reverse_lstm:\n x = th.flip(x, [1])\n state_out = tree_map(lambda _s: _s.transpose(0, 1), state_out) # B, NL, H\n return x, state_out\n else:\n return module(x, first, state)\ndef _banded_repeat(x, t):\n \"\"\"\n Repeats x with a shift.\n For example (ignoring the batch dimension):\n _banded_repeat([A B C D E], 4)\n =\n [D E 0 0 0]\n [C D E 0 0]\n [B C D E 0]\n [A B C D E]\n \"\"\"\n b, T = x.shape\n x = th.cat([x, x.new_zeros(b, t - 1)], dim=1)\n result = x.unfold(1, T, 1).flip(1)\n return result\ndef bandify(b_nd, t, T):\n \"\"\"\n b_nd -> D_ntT, where\n \"n\" indexes over basis functions"
+ },
+ {
+ "comment": "This code defines a function `get_norm` for normalization, and another function (not shown) called `_banded_repeat`. The `B_ntT` shape is being assigned based on the `b_nd` shape and a time index `T`. If `bandsize >= T`, it assigns `b_nT` as `b_nd[:, -T:]`. Otherwise, it concatenates `b_nd.new_zeros(nbasis, T - bandsize)` and `b_nd` along dimension 1 to form `b_nT`. The function then returns the result of `_banded_repeat(b_nT, t)`.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/util.py\":253-275",
+ "content": " \"d\" indexes over time differences\n \"t\" indexes over output time\n \"T\" indexes over input time\n only t >= T is nonzero\n B_ntT[n, t, T] = b_nd[n, t - T]\n \"\"\"\n nbasis, bandsize = b_nd.shape\n b_nd = b_nd[:, th.arange(bandsize - 1, -1, -1)]\n if bandsize >= T:\n b_nT = b_nd[:, -T:]\n else:\n b_nT = th.cat([b_nd.new_zeros(nbasis, T - bandsize), b_nd], dim=1)\n D_tnT = _banded_repeat(b_nT, t)\n return D_tnT\ndef get_norm(name, d, dtype=th.float32):\n if name == \"none\":\n return lambda x: x\n elif name == \"layer\":\n return tu.LayerNorm(d, dtype=dtype)\n else:\n raise NotImplementedError(name)"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/23f7199d-4253-4b37-976f-09d45221301e.json b/docs/doc/23f7199d-4253-4b37-976f-09d45221301e.json
new file mode 100644
index 0000000..8572a62
--- /dev/null
+++ b/docs/doc/23f7199d-4253-4b37-976f-09d45221301e.json
@@ -0,0 +1,45 @@
+{
+ "summary": "Both comments discuss data processing tasks, with Comment A focusing on calculating and dividing products in a list 'x', while Comment B describes a function for reshaping input data, considering exceptions and undo functions, and utilizing a 'known' dictionary for shape inference.",
+ "details": [
+ {
+ "comment": "The code contains several functions related to data manipulation, such as calculating the product of a sequence of integers (`intprod`), checking and zipping lengths of sequences (`safezip`), transposing data with given before and after specifications (`transpose`), and undoing a data transposition (`transpose_undo`).",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/misc.py\":0-42",
+ "content": "import numpy as np\nimport torch as th\ndef intprod(xs):\n \"\"\"\n Product of a sequence of integers\n \"\"\"\n out = 1\n for x in xs:\n out *= x\n return out\ndef safezip(*args):\n \"\"\"\n Check that lengths of sequences are the same, then zip them\n \"\"\"\n args = [list(a) for a in args]\n n = len(args[0])\n for arg in args[1:]:\n assert len(arg) == n, f\"length mismatch: {list(map(len, args))}\"\n return list(zip(*args))\ndef transpose(x, before, after):\n \"\"\"\n Usage: x_bca = transpose(x_abc, 'abc', 'bca')\n \"\"\"\n assert sorted(before) == sorted(after), f\"cannot transpose {before} to {after}\"\n assert x.ndim == len(\n before\n ), f\"before spec '{before}' has length {len(before)} but x has {x.ndim} dimensions: {tuple(x.shape)}\"\n return x.permute(tuple(before.index(i) for i in after))\ndef transpose_undo(x, before, after, *, undo=None):\n \"\"\"\n Usage:\n x_bca, undo = transpose_undo(x_abc, 'abc', 'bca')\n x_bca = fully_connected_layer(x_bca)\n x_abc = undo(x_bca)\n \"\"\""
+ },
+ {
+ "comment": "Function `transpose` takes an input tensor, and a list of tuples specifying the axes to permute. It returns the transposed tensor and a function that undoes the transpose operation.\nFunction `compose_undo` combines two transformation functions into a single one that applies them in reverse order. If either is None, it simply returns the other. Otherwise, it creates an anonymous function that first applies the second transformation, then the first, and finally returns the result.\nString `NO_BIND` is used as a placeholder when a dimension cannot be bound to a specific variable.\nFunction `_parse_reshape_str` parses a string of the form \"x,*y,?z\" where x, y, and z are integers or '?' symbols. It returns a list containing three lists: the first contains '?' characters for 'before', '_' characters for 'after', and actual numbers for 'none'. The second contains actual numbers for 'before', and the third contains actual numbers for 'after'.\nFunction `_infer_part` infers the part of the tensor shape to be used based on the type of the input. If it is an integer, it returns that integer. Otherwise, it processes a list of terms, handling integers and strings containing '*' symbols differently.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/misc.py\":43-88",
+ "content": " return (\n transpose(x, before, after),\n compose_undo(undo, lambda x: transpose(x, before=after, after=before)),\n )\ndef compose_undo(u1, u2):\n assert u2 is not None\n if u1 is None:\n return u2\n def u(x):\n x = u2(x)\n x = u1(x)\n return x\n return u\nNO_BIND = \"__nobind\"\ndef _parse_reshape_str(s, kind):\n assert kind in (\"before\", \"after\")\n result = []\n n_underscores = 0\n for i, part in enumerate(s.split(\",\")):\n part = part.strip()\n if part == \"?\" and kind == \"before\":\n result.append([f\"__{i}\"])\n elif part == \"_\":\n result.append([f\"{NO_BIND}_{n_underscores}\"])\n n_underscores += 1\n else:\n result.append([term.strip() for term in part.split(\"*\")])\n return result\ndef _infer_part(part, concrete_dim, known, index, full_shape):\n if type(part) is int:\n return part\n assert isinstance(part, list), part\n lits = []\n syms = []\n for term in part:\n if type(term) is int:"
+ },
+ {
+ "comment": "This function takes a term, checks if it's an int or str, and performs calculations based on the input type. If int, it multiplies all literals (int or float) and returns the result. If str, it checks if there's only one symbol and concrete_dim is given. It asserts that concrete_dim is divisible by int_part and calculates v. If the symbol is already in known values, it asserts the known value matches. If not, it adds the symbol to known with its calculated value. Finally, if there are multiple symbols, it iterates through them and converts strings to ints.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/misc.py\":89-117",
+ "content": " lits.append(term)\n elif type(term) is str:\n syms.append(term)\n else:\n raise TypeError(f\"got {type(term)} but expected int or str\")\n int_part = 1\n for x in lits:\n int_part *= x\n if len(syms) == 0:\n return int_part\n elif len(syms) == 1 and concrete_dim is not None:\n assert concrete_dim % int_part == 0, f\"{concrete_dim} % {int_part} != 0 (at index {index}, full shape is {full_shape})\"\n v = concrete_dim // int_part\n if syms[0] in known:\n assert (\n known[syms[0]] == v\n ), f\"known value for {syms[0]} is {known[syms[0]]} but found value {v} at index {index} (full shape is {full_shape})\"\n else:\n known[syms[0]] = v\n return concrete_dim\n else:\n for i in range(len(syms)):\n if syms[i] in known:\n syms[i] = known[syms[i]]\n else:\n try:\n syms[i] = int(syms[i])\n except ValueError:\n pass"
+ },
+ {
+ "comment": "This function takes an existing list `lits` and a symbol `syms` and returns a new list where the `syms` occur after all elements in `lits`. The `_infer_step()` function takes known values, description, and shape as arguments. It creates copies of the new known and description lists and loops through each element in the description list. If a specific shape is provided, it assigns the corresponding dimension to `concrete_dim`. Then, it calls `_infer_part()` with the part, concrete dimension, known values, index, and full shape as arguments. The function returns the new known values, description list, and shape. The `fixed_point()` function uses a lambda function to check for equality between two inputs. It continues to apply the given function to the input until it reaches a fixed point where the input remains unchanged. Lastly, `_infer_question_mark()` function tries to find the index of \"?\" in the list and returns the list if found.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/misc.py\":118-156",
+ "content": " return lits + syms\ndef _infer_step(args):\n known, desc, shape = args\n new_known = known.copy()\n new_desc = desc.copy()\n for i in range(len(desc)):\n if shape is None:\n concrete_dim = None\n else:\n concrete_dim = shape[i]\n new_desc[i] = _infer_part(part=desc[i], concrete_dim=concrete_dim, known=new_known, index=i, full_shape=shape)\n return new_known, new_desc, shape\ndef _infer(known, desc, shape):\n if shape is not None:\n assert len(desc) == len(shape), f\"desc has length {len(desc)} but shape has length {len(shape)} (shape={shape})\"\n known, desc, shape = fixed_point(_infer_step, (known, desc, shape))\n return desc, known\ndef fixed_point(f, x, eq=None):\n if eq is None:\n eq = lambda a, b: a == b\n while True:\n new_x = f(x)\n if eq(x, new_x):\n return x\n else:\n x = new_x\ndef _infer_question_mark(x, total_product):\n try:\n question_mark_index = x.index([\"?\"])\n except ValueError:\n return x"
+ },
+ {
+ "comment": "- Calculate product of known values in list 'x'\n- Assert that the total product is divisible by observed product and return error message if not\n- Update list 'x' with calculated value for the question mark index and return it",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/misc.py\":157-186",
+ "content": " observed_product = 1\n for i in range(len(x)):\n if i != question_mark_index:\n assert type(x[i]) is int, f\"when there is a question mark, there can be no other unknown values (full list: {x})\"\n observed_product *= x[i]\n assert (\n observed_product and total_product % observed_product == 0\n ), f\"{total_product} is not divisible by {observed_product}\"\n value = total_product // observed_product\n x = x.copy()\n x[question_mark_index] = value\n return x\ndef _ground(x, known, infer_question_mark_with=None):\n x, known = _infer(known=known, desc=x, shape=None)\n if infer_question_mark_with:\n x = _infer_question_mark(x, infer_question_mark_with)\n for part in x:\n assert type(part) is int, f\"cannot infer value of {part}\"\n return x\ndef _handle_ellipsis(x, before, after):\n ell = [\"...\"]\n try:\n i = before.index(ell)\n l = len(x.shape) - len(before) + 1\n ellipsis_value = x.shape[i : i + l]\n ellipsis_value = list(ellipsis_value)"
+ },
+ {
+ "comment": "The code performs shape reshaping operations and handles any exceptions that may occur during the process. It takes input, initial 'before' and 'after' shapes as arguments, and optional 'undo' and 'known' parameters. If 'known' is provided, it becomes a dictionary of known values to help with shape inference. The function returns reshaped input and an 'undo' function for reverting the reshape operation.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/misc.py\":187-222",
+ "content": " before = before[:i] + ellipsis_value + before[i + 1 :]\n except ValueError:\n pass\n try:\n i = after.index(ell)\n after = after[:i] + ellipsis_value + after[i + 1 :]\n except ValueError:\n pass\n except UnboundLocalError as e:\n raise ValueError(\"there cannot be an ellipsis in 'after' unless there is an ellipsis in 'before'\") from e\n return before, after\ndef reshape_undo(inp, before, after, *, undo=None, known=None, **kwargs):\n \"\"\"\n Usage:\n x_Bhwse, undo = reshape_undo(\n x_bthwe,\n 'b, t, ..., stride*e',\n 'b*t, ..., stride, e',\n stride=7\n )\n x_Bhwse = do_some_stuff(x_Bhwse)\n x_bthwe = undo(x_Bhwse)\n It's necessary to pass known values as keywords only\n when they can't be inferred from the shape.\n (Eg. in the above example we needed to pass\n stride but not b, t, or e, since those can be determined from\n inp.shape once stride is known.)\n \"\"\"\n if known:\n known = {**kwargs, **known}\n else:\n known = kwargs"
+ },
+ {
+ "comment": "Ensures input types are correct and parses reshape string if input is a string. Infers the shape of the input, grounds it, and removes any bindings marked with NO_BIND. Asserts that the shapes match and returns the result.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/misc.py\":223-239",
+ "content": " assert type(before) is type(after), f\"{type(before)} != {type(after)}\"\n assert isinstance(inp, (th.Tensor, np.ndarray)), f\"require tensor or ndarray but got {type(inp)}\"\n assert isinstance(before, (str, list)), f\"require str or list but got {type(before)}\"\n if isinstance(before, str):\n before = _parse_reshape_str(before, \"before\")\n after = _parse_reshape_str(after, \"after\")\n before, after = _handle_ellipsis(inp, before, after)\n before_saved, after_saved = before, after\n before, known = _infer(known=known, desc=before, shape=inp.shape)\n before = _ground(before, known, product(inp.shape))\n after = _ground(after, known, product(inp.shape))\n known = {k: v for k, v in known.items() if not k.startswith(NO_BIND)}\n assert tuple(inp.shape) == tuple(before), f\"expected shape {before} but got shape {inp.shape}\"\n assert product(inp.shape) == product(\n after\n ), f\"cannot reshape {inp.shape} to {after} because the number of elements does not match\"\n return ("
+ },
+ {
+ "comment": "The code contains functions for reshaping arrays, calculating products of a list of numbers, and performing an exact division.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/misc.py\":240-262",
+ "content": " inp.reshape(after),\n compose_undo(undo, lambda inp: reshape(inp, after_saved, before_saved, known=known)),\n )\ndef reshape(*args, **kwargs):\n \"\"\"\n Please see the documenation for reshape_undo.\n \"\"\"\n x, _ = reshape_undo(*args, **kwargs)\n return x\ndef product(xs, one=1):\n result = one\n for x in xs:\n result = result * x\n return result\ndef exact_div(a, b):\n assert a % b == 0, f\"{a} is not divisible by {b}\"\n return a // b"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/4aa795e3-98ec-47f8-8182-b879751876fb.json b/docs/doc/4aa795e3-98ec-47f8-8182-b879751876fb.json
new file mode 100644
index 0000000..b977f5d
--- /dev/null
+++ b/docs/doc/4aa795e3-98ec-47f8-8182-b879751876fb.json
@@ -0,0 +1,15 @@
+{
+ "summary": "The code defines a ScaledMSEHead class for a linear output layer, scales targets to N(0, 1), and calculates MSE loss between normalized predictions and denormalized target values in a normalized space.",
+ "details": [
+ {
+ "comment": "This code defines a ScaledMSEHead class which is a linear output layer. It scales itself so that targets are always normalized to N(0, 1). The input size, output size, normalization type (ewma), and normalization kwargs can be set upon instantiation. The reset_parameters function initializes the weights with orthogonal initialization and resets the normalizer's parameters. Forward function passes input data through a linear layer.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/scaled_mse_head.py\":0-34",
+ "content": "from typing import Dict, Optional\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.nn.init as init\nfrom lib.action_head import fan_in_linear\nfrom lib.normalize_ewma import NormalizeEwma\nclass ScaledMSEHead(nn.Module):\n \"\"\"\n Linear output layer that scales itself so that targets are always normalized to N(0, 1)\n \"\"\"\n def __init__(\n self, input_size: int, output_size: int, norm_type: Optional[str] = \"ewma\", norm_kwargs: Optional[Dict] = None\n ):\n super().__init__()\n self.input_size = input_size\n self.output_size = output_size\n self.norm_type = norm_type\n self.linear = nn.Linear(self.input_size, self.output_size)\n norm_kwargs = {} if norm_kwargs is None else norm_kwargs\n self.normalizer = NormalizeEwma(output_size, **norm_kwargs)\n def reset_parameters(self):\n init.orthogonal_(self.linear.weight)\n fan_in_linear(self.linear)\n self.normalizer.reset_parameters()\n def forward(self, input_data):\n return self.linear(input_data)"
+ },
+ {
+ "comment": "This code defines a loss function for MSE (Mean Squared Error) and normalization/denormalization functions. It calculates the MSE loss between normalized prediction and denormalized target values in a normalized space, and converts input values from normalized to original space.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/scaled_mse_head.py\":36-49",
+ "content": " def loss(self, prediction, target):\n \"\"\"\n Calculate the MSE loss between output and a target.\n 'Prediction' has to be normalized while target is denormalized.\n Loss is calculated in a 'normalized' space.\n \"\"\"\n return F.mse_loss(prediction, self.normalizer(target), reduction=\"mean\")\n def denormalize(self, input_data):\n \"\"\"Convert input value from a normalized space into the original one\"\"\"\n return self.normalizer.denormalize(input_data)\n def normalize(self, input_data):\n return self.normalizer(input_data)"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/4ef7eda2-c6d2-49bc-adbf-3114c26ab5a4.json b/docs/doc/4ef7eda2-c6d2-49bc-adbf-3114c26ab5a4.json
new file mode 100644
index 0000000..4568b6f
--- /dev/null
+++ b/docs/doc/4ef7eda2-c6d2-49bc-adbf-3114c26ab5a4.json
@@ -0,0 +1,35 @@
+{
+ "summary": "The code imports necessary libraries, defines parameters, and creates an agent object for policy-based actor-critic model training in a behavioral cloning task. It trains the model using batches of data, updates weights, and reports average loss at specified intervals.",
+ "details": [
+ {
+ "comment": "This code imports necessary libraries and defines constants for basic behavioral cloning using gradient accumulation. It uses a smaller GPU, and it's not the original code used for VPT but serves to illustrate fine-tuning models with specific processing steps. The code specifies the number of epochs, batch size, number of workers, and device for training.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/behavioural_cloning.py\":0-33",
+ "content": "# Basic behavioural cloning\n# Note: this uses gradient accumulation in batches of ones\n# to perform training.\n# This will fit inside even smaller GPUs (tested on 8GB one),\n# but is slow.\n# NOTE: This is _not_ the original code used for VPT!\n# This is merely to illustrate how to fine-tune the models and includes\n# the processing steps used.\n# This will likely be much worse than what original VPT did:\n# we are not training on full sequences, but only one step at a time to save VRAM.\nfrom argparse import ArgumentParser\nimport pickle\nimport time\nimport gym\nimport minerl\nimport torch as th\nimport numpy as np\nfrom agent import PI_HEAD_KWARGS, MineRLAgent\nfrom data_loader import DataLoader\nfrom lib.tree_util import tree_map\nEPOCHS = 2\n# Needs to be <= number of videos\nBATCH_SIZE = 8\n# Ideally more than batch size to create\n# variation in datasets (otherwise, you will\n# get a bunch of consecutive samples)\n# Decrease this (and batch_size) if you run out of memory\nN_WORKERS = 12\nDEVICE = \"cuda\""
+ },
+ {
+ "comment": "Load model parameters from file, define environment settings, and create agent object with defined policy and head parameters.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/behavioural_cloning.py\":35-59",
+ "content": "LOSS_REPORT_RATE = 100\nLEARNING_RATE = 0.000181\nWEIGHT_DECAY = 0.039428\nMAX_GRAD_NORM = 5.0\ndef load_model_parameters(path_to_model_file):\n agent_parameters = pickle.load(open(path_to_model_file, \"rb\"))\n policy_kwargs = agent_parameters[\"model\"][\"args\"][\"net\"][\"args\"]\n pi_head_kwargs = agent_parameters[\"model\"][\"args\"][\"pi_head_opts\"]\n pi_head_kwargs[\"temperature\"] = float(pi_head_kwargs[\"temperature\"])\n return policy_kwargs, pi_head_kwargs\ndef behavioural_cloning_train(data_dir, in_model, in_weights, out_weights):\n agent_policy_kwargs, agent_pi_head_kwargs = load_model_parameters(in_model)\n # To create model with the right environment.\n # All basalt environments have the same settings, so any of them works here\n env = gym.make(\"MineRLBasaltFindCave-v0\")\n agent = MineRLAgent(env, device=DEVICE, policy_kwargs=agent_policy_kwargs, pi_head_kwargs=agent_pi_head_kwargs)\n agent.load_weights(in_weights)\n env.close()\n policy = agent.policy\n trainable_parameters = policy.parameters()"
+ },
+ {
+ "comment": "Setting up optimizer, data loader, and initializing variables for training.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/behavioural_cloning.py\":61-90",
+ "content": " # Parameters taken from the OpenAI VPT paper\n optimizer = th.optim.Adam(\n trainable_parameters,\n lr=LEARNING_RATE,\n weight_decay=WEIGHT_DECAY\n )\n data_loader = DataLoader(\n dataset_dir=data_dir,\n n_workers=N_WORKERS,\n batch_size=BATCH_SIZE,\n n_epochs=EPOCHS\n )\n start_time = time.time()\n # Keep track of the hidden state per episode/trajectory.\n # DataLoader provides unique id for each episode, which will\n # be different even for the same trajectory when it is loaded\n # up again\n episode_hidden_states = {}\n dummy_first = th.from_numpy(np.array((False,))).to(DEVICE)\n loss_sum = 0\n for batch_i, (batch_images, batch_actions, batch_episode_id) in enumerate(data_loader):\n batch_loss = 0\n for image, action, episode_id in zip(batch_images, batch_actions, batch_episode_id):\n agent_action = agent._env_action_to_agent(action, to_torch=True, check_if_null=True)\n if agent_action is None:\n # Action was null"
+ },
+ {
+ "comment": "The code is setting up the environment for a policy-based actor-critic model in a behavioral cloning task. It assigns the hidden state for the episode, gets the output for the observation, calculates the log probability of the action, and updates the agent's state.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/behavioural_cloning.py\":91-113",
+ "content": " continue\n agent_obs = agent._env_obs_to_agent({\"pov\": image})\n if episode_id not in episode_hidden_states:\n # TODO need to clean up this hidden state after worker is done with the work item.\n # Leaks memory, but not tooooo much at these scales (will be a problem later).\n episode_hidden_states[episode_id] = policy.initial_state(1)\n agent_state = episode_hidden_states[episode_id]\n pi_distribution, v_prediction, new_agent_state = policy.get_output_for_observation(\n agent_obs,\n agent_state,\n dummy_first\n )\n log_prob = policy.get_logprob_of_action(pi_distribution, agent_action)\n # Make sure we do not try to backprop through sequence\n # (fails with current accumulation)\n new_agent_state = tree_map(lambda x: x.detach(), new_agent_state)\n episode_hidden_states[episode_id] = new_agent_state\n # Finally, update the agent to increase the probability of the"
+ },
+ {
+ "comment": "The code is training a policy model using behavioral cloning on batches of data. It calculates the batch loss, applies gradients and updates weights, saves state dictionary to a specified output file, and reports the average loss every LOSS_REPORT_RATE batches. The inputs are the path to the directory containing recordings for training, the path to the model file to be fine-tuned, and the path to the weights file to be fine-tuned.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/behavioural_cloning.py\":114-138",
+ "content": " # taken action.\n # Remember to take mean over batch losses\n loss = -log_prob / BATCH_SIZE\n batch_loss += loss.item()\n loss.backward()\n th.nn.utils.clip_grad_norm_(trainable_parameters, MAX_GRAD_NORM)\n optimizer.step()\n optimizer.zero_grad()\n loss_sum += batch_loss\n if batch_i % LOSS_REPORT_RATE == 0:\n time_since_start = time.time() - start_time\n print(f\"Time: {time_since_start:.2f}, Batches: {batch_i}, Avrg loss: {loss_sum / LOSS_REPORT_RATE:.4f}\")\n loss_sum = 0\n state_dict = policy.state_dict()\n th.save(state_dict, out_weights)\nif __name__ == \"__main__\":\n parser = ArgumentParser()\n parser.add_argument(\"--data-dir\", type=str, required=True, help=\"Path to the directory containing recordings to be trained on\")\n parser.add_argument(\"--in-model\", required=True, type=str, help=\"Path to the .model file to be finetuned\")\n parser.add_argument(\"--in-weights\", required=True, type=str, help=\"Path to the .weights file to be finetuned\")"
+ },
+ {
+ "comment": "The code adds an argument for the output weights path and parses the command line arguments, then calls the behavioral cloning training function.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/behavioural_cloning.py\":139-142",
+ "content": " parser.add_argument(\"--out-weights\", required=True, type=str, help=\"Path where finetuned weights will be saved\")\n args = parser.parse_args()\n behavioural_cloning_train(args.data_dir, args.in_model, args.in_weights, args.out_weights)"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/53d0d63b-73f9-4375-aaf9-a688ebf98c23.json b/docs/doc/53d0d63b-73f9-4375-aaf9-a688ebf98c23.json
new file mode 100644
index 0000000..921938b
--- /dev/null
+++ b/docs/doc/53d0d63b-73f9-4375-aaf9-a688ebf98c23.json
@@ -0,0 +1,40 @@
+{
+ "summary": "The code initializes a game dictionary, defines model actions, manages camera resets, handles inputs, loads weights, captures video input, reads JSON data, and displays IDM predictions on a video stream with OpenCV functions.",
+ "details": [
+ {
+ "comment": "This code is initializing a dictionary mapping keyboard button names to their respective actions in the game. The code is used for controlling the character's movements and actions in the game environment using keyboard inputs.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/run_inverse_dynamics_model.py\":0-35",
+ "content": "# NOTE: this is _not_ the original code of IDM!\n# As such, while it is close and seems to function well,\n# its performance might be bit off from what is reported\n# in the paper.\nfrom argparse import ArgumentParser\nimport pickle\nimport cv2\nimport numpy as np\nimport json\nimport torch as th\nfrom agent import ENV_KWARGS\nfrom inverse_dynamics_model import IDMAgent\nKEYBOARD_BUTTON_MAPPING = {\n \"key.keyboard.escape\" :\"ESC\",\n \"key.keyboard.s\" :\"back\",\n \"key.keyboard.q\" :\"drop\",\n \"key.keyboard.w\" :\"forward\",\n \"key.keyboard.1\" :\"hotbar.1\",\n \"key.keyboard.2\" :\"hotbar.2\",\n \"key.keyboard.3\" :\"hotbar.3\",\n \"key.keyboard.4\" :\"hotbar.4\",\n \"key.keyboard.5\" :\"hotbar.5\",\n \"key.keyboard.6\" :\"hotbar.6\",\n \"key.keyboard.7\" :\"hotbar.7\",\n \"key.keyboard.8\" :\"hotbar.8\",\n \"key.keyboard.9\" :\"hotbar.9\",\n \"key.keyboard.e\" :\"inventory\",\n \"key.keyboard.space\" :\"jump\",\n \"key.keyboard.a\" :\"left\",\n \"key.keyboard.d\" :\"right\",\n \"key.keyboard.left.shift\" :\"sneak\",\n \"key.keyboard.left.control\" :\"sprint\","
+ },
+ {
+ "comment": "This code defines a set of actions that the model should predict for a given video. It also includes a template action and a message to be displayed with a cv2 window. The CAMERA_SCALER is used for mapping sensitivity from recorded Java code to the one used in the model. The json_action_to_env_action function converts a JSON action into a MineRL action.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/run_inverse_dynamics_model.py\":36-85",
+ "content": " \"key.keyboard.f\" :\"swapHands\",\n}\n# Template action\nNOOP_ACTION = {\n \"ESC\": 0,\n \"back\": 0,\n \"drop\": 0,\n \"forward\": 0,\n \"hotbar.1\": 0,\n \"hotbar.2\": 0,\n \"hotbar.3\": 0,\n \"hotbar.4\": 0,\n \"hotbar.5\": 0,\n \"hotbar.6\": 0,\n \"hotbar.7\": 0,\n \"hotbar.8\": 0,\n \"hotbar.9\": 0,\n \"inventory\": 0,\n \"jump\": 0,\n \"left\": 0,\n \"right\": 0,\n \"sneak\": 0,\n \"sprint\": 0,\n \"swapHands\": 0,\n \"camera\": np.array([0, 0]),\n \"attack\": 0,\n \"use\": 0,\n \"pickItem\": 0,\n}\nMESSAGE = \"\"\"\nThis script will take a video, predict actions for its frames and\nand show them with a cv2 window.\nPress any button the window to proceed to the next frame.\n\"\"\"\n# Matches a number in the MineRL Java code regarding sensitivity\n# This is for mapping from recorded sensitivity to the one used in the model\nCAMERA_SCALER = 360.0 / 2400.0\ndef json_action_to_env_action(json_action):\n \"\"\"\n Converts a json action into a MineRL action.\n Returns (minerl_action, is_null_action)\n \"\"\"\n # This might be slow...\n env_action = NOOP_ACTION.copy()"
+ },
+ {
+ "comment": "This code resets the camera action to avoid overriding other actions and handles keyboard and mouse inputs for the environment.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/run_inverse_dynamics_model.py\":86-111",
+ "content": " # As a safeguard, make camera action again so we do not override anything\n env_action[\"camera\"] = np.array([0, 0])\n is_null_action = True\n keyboard_keys = json_action[\"keyboard\"][\"keys\"]\n for key in keyboard_keys:\n # You can have keys that we do not use, so just skip them\n # NOTE in original training code, ESC was removed and replaced with\n # \"inventory\" action if GUI was open.\n # Not doing it here, as BASALT uses ESC to quit the game.\n if key in KEYBOARD_BUTTON_MAPPING:\n env_action[KEYBOARD_BUTTON_MAPPING[key]] = 1\n is_null_action = False\n mouse = json_action[\"mouse\"]\n camera_action = env_action[\"camera\"]\n camera_action[0] = mouse[\"dy\"] * CAMERA_SCALER\n camera_action[1] = mouse[\"dx\"] * CAMERA_SCALER\n if mouse[\"dx\"] != 0 or mouse[\"dy\"] != 0:\n is_null_action = False\n else:\n if abs(camera_action[0]) > 180:\n camera_action[0] = 0\n if abs(camera_action[1]) > 180:\n camera_action[1] = 0"
+ },
+ {
+ "comment": "This code handles mouse button events and initializes an inverse dynamics model agent for a game. It loads the agent's weights from a file, captures video input, and reads a JSON file containing game data.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/run_inverse_dynamics_model.py\":113-142",
+ "content": " mouse_buttons = mouse[\"buttons\"]\n if 0 in mouse_buttons:\n env_action[\"attack\"] = 1\n is_null_action = False\n if 1 in mouse_buttons:\n env_action[\"use\"] = 1\n is_null_action = False\n if 2 in mouse_buttons:\n env_action[\"pickItem\"] = 1\n is_null_action = False\n return env_action, is_null_action\ndef main(model, weights, video_path, json_path, n_batches, n_frames):\n print(MESSAGE)\n agent_parameters = pickle.load(open(model, \"rb\"))\n net_kwargs = agent_parameters[\"model\"][\"args\"][\"net\"][\"args\"]\n pi_head_kwargs = agent_parameters[\"model\"][\"args\"][\"pi_head_opts\"]\n pi_head_kwargs[\"temperature\"] = float(pi_head_kwargs[\"temperature\"])\n agent = IDMAgent(idm_net_kwargs=net_kwargs, pi_head_kwargs=pi_head_kwargs)\n agent.load_weights(weights)\n required_resolution = ENV_KWARGS[\"resolution\"]\n cap = cv2.VideoCapture(video_path)\n json_index = 0\n with open(json_path) as json_file:\n json_lines = json_file.readlines()\n json_data = \"[\" + \",\".join(json_lines) + \"]\""
+ },
+ {
+ "comment": "Loading and preprocessing video frames, converting actions from JSON to environment actions, predicting actions using the agent model, and displaying predictions on video frames.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/run_inverse_dynamics_model.py\":143-169",
+ "content": " json_data = json.loads(json_data)\n for _ in range(n_batches):\n th.cuda.empty_cache()\n print(\"=== Loading up frames ===\")\n frames = []\n recorded_actions = []\n for _ in range(n_frames):\n ret, frame = cap.read()\n if not ret:\n break\n assert frame.shape[0] == required_resolution[1] and frame.shape[1] == required_resolution[0], \"Video must be of resolution {}\".format(required_resolution)\n # BGR -> RGB\n frames.append(frame[..., ::-1])\n env_action, _ = json_action_to_env_action(json_data[json_index])\n recorded_actions.append(env_action)\n json_index += 1\n frames = np.stack(frames)\n print(\"=== Predicting actions ===\")\n predicted_actions = agent.predict_actions(frames)\n for i in range(n_frames):\n frame = frames[i]\n recorded_action = recorded_actions[i]\n cv2.putText(\n frame,\n f\"name: prediction (true)\","
+ },
+ {
+ "comment": "The code is displaying IDM model predictions on a video stream, with text labels for each action. It uses OpenCV's putText function to draw the labels on the frame and then displays the resulting image using cv2.imshow and waitKey functions. The code also takes arguments for weights and model files required to load the model.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/run_inverse_dynamics_model.py\":170-196",
+ "content": " (10, 10),\n cv2.FONT_HERSHEY_SIMPLEX,\n 0.4,\n (255, 255, 255),\n 1\n )\n for y, (action_name, action_array) in enumerate(predicted_actions.items()):\n current_prediction = action_array[0, i]\n cv2.putText(\n frame,\n f\"{action_name}: {current_prediction} ({recorded_action[action_name]})\",\n (10, 25 + y * 12),\n cv2.FONT_HERSHEY_SIMPLEX,\n 0.35,\n (255, 255, 255),\n 1\n )\n # RGB -> BGR again...\n cv2.imshow(\"MineRL IDM model predictions\", frame[..., ::-1])\n cv2.waitKey(0)\n cv2.destroyAllWindows()\nif __name__ == \"__main__\":\n parser = ArgumentParser(\"Run IDM on MineRL recordings.\")\n parser.add_argument(\"--weights\", type=str, required=True, help=\"Path to the '.weights' file to be loaded.\")\n parser.add_argument(\"--model\", type=str, required=True, help=\"Path to the '.model' file to be loaded.\")"
+ },
+ {
+ "comment": "This code sets up command line arguments for video path, JSONL file path, number of frames to process at a time, and the number of batches to process for visualization. It then parses these arguments into \"args\" and calls the main function with these arguments.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/run_inverse_dynamics_model.py\":197-204",
+ "content": " parser.add_argument(\"--video-path\", type=str, required=True, help=\"Path to a .mp4 file (Minecraft recording).\")\n parser.add_argument(\"--jsonl-path\", type=str, required=True, help=\"Path to a .jsonl file (Minecraft recording).\")\n parser.add_argument(\"--n-frames\", type=int, default=128, help=\"Number of frames to process at a time.\")\n parser.add_argument(\"--n-batches\", type=int, default=10, help=\"Number of batches (n-frames) to process for visualization.\")\n args = parser.parse_args()\n main(args.model, args.weights, args.video_path, args.jsonl_path, args.n_batches, args.n_frames)"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/828d206d-59f2-4b3a-a349-ae8a46170bbf.json b/docs/doc/828d206d-59f2-4b3a-a349-ae8a46170bbf.json
new file mode 100644
index 0000000..6b7d271
--- /dev/null
+++ b/docs/doc/828d206d-59f2-4b3a-a349-ae8a46170bbf.json
@@ -0,0 +1,20 @@
+{
+ "summary": "The NormalizeEwma module normalizes data across dimensions, calculates debiased mean and variance, and provides methods for normalization and denormalization. It maintains running mean and variance for input vectors during training while avoiding backpropagation issues.",
+ "details": [
+ {
+ "comment": "NormalizeEwma is an EWMA (Exponential Weighted Moving Average) normalization module for vectors of observations. It normalizes the data across specific dimensions, with optional per-element update and debiasing term.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/normalize_ewma.py\":0-27",
+ "content": "import numpy as np\nimport torch\nimport torch.nn as nn\nclass NormalizeEwma(nn.Module):\n \"\"\"Normalize a vector of observations - across the first norm_axes dimensions\"\"\"\n def __init__(self, input_shape, norm_axes=2, beta=0.99999, per_element_update=False, epsilon=1e-5):\n super().__init__()\n self.input_shape = input_shape\n self.norm_axes = norm_axes\n self.epsilon = epsilon\n self.beta = beta\n self.per_element_update = per_element_update\n self.running_mean = nn.Parameter(torch.zeros(input_shape, dtype=torch.float), requires_grad=False)\n self.running_mean_sq = nn.Parameter(torch.zeros(input_shape, dtype=torch.float), requires_grad=False)\n self.debiasing_term = nn.Parameter(torch.tensor(0.0, dtype=torch.float), requires_grad=False)\n def reset_parameters(self):\n self.running_mean.zero_()\n self.running_mean_sq.zero_()\n self.debiasing_term.zero_()\n def running_mean_var(self):\n debiased_mean = self.running_mean / self.debiasing_term.clamp(min=self.epsilon)"
+ },
+ {
+ "comment": "This code calculates the debiased mean and variance of input vectors for each batch while training. It normalizes the input to float32, updates running means and squared means with detached inputs, and applies weighted averages to avoid backpropagation through subsequent batches.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/normalize_ewma.py\":28-50",
+ "content": " debiased_mean_sq = self.running_mean_sq / self.debiasing_term.clamp(min=self.epsilon)\n debiased_var = (debiased_mean_sq - debiased_mean ** 2).clamp(min=1e-2)\n return debiased_mean, debiased_var\n def forward(self, input_vector):\n # Make sure input is float32\n input_vector = input_vector.to(torch.float)\n if self.training:\n # Detach input before adding it to running means to avoid backpropping through it on\n # subsequent batches.\n detached_input = input_vector.detach()\n batch_mean = detached_input.mean(dim=tuple(range(self.norm_axes)))\n batch_sq_mean = (detached_input ** 2).mean(dim=tuple(range(self.norm_axes)))\n if self.per_element_update:\n batch_size = np.prod(detached_input.size()[: self.norm_axes])\n weight = self.beta ** batch_size\n else:\n weight = self.beta\n self.running_mean.mul_(weight).add_(batch_mean * (1.0 - weight))\n self.running_mean_sq.mul_(weight).add_(batch_sq_mean * (1.0 - weight))"
+ },
+ {
+ "comment": "This class provides methods to normalize and denormalize data. It also maintains running mean and variance.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/normalize_ewma.py\":51-59",
+ "content": " self.debiasing_term.mul_(weight).add_(1.0 * (1.0 - weight))\n mean, var = self.running_mean_var()\n return (input_vector - mean[(None,) * self.norm_axes]) / torch.sqrt(var)[(None,) * self.norm_axes]\n def denormalize(self, input_vector):\n \"\"\"Transform normalized data back into original distribution\"\"\"\n mean, var = self.running_mean_var()\n return input_vector * torch.sqrt(var)[(None,) * self.norm_axes] + mean[(None,) * self.norm_axes]"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/88ad27ef-f62f-4656-8119-f1c927ea9e23.json b/docs/doc/88ad27ef-f62f-4656-8119-f1c927ea9e23.json
new file mode 100644
index 0000000..10990aa
--- /dev/null
+++ b/docs/doc/88ad27ef-f62f-4656-8119-f1c927ea9e23.json
@@ -0,0 +1,85 @@
+{
+ "summary": "The code includes classes for image preprocessing, reinforcement learning with optional parameters, and a MinecraftAgentPolicy network using PyTorch neural networks. It handles policy decisions, actions, and probabilities in the policy network while utilizing 3D convolution layers for reinforcement learning models.",
+ "details": [
+ {
+ "comment": "This code defines a class called \"ImgPreprocessing\" which is used to normalize incoming images. It has an optional parameter for img_statistics, a remote path to a npz file containing mean and std image values. If img_statistics is provided, the images are normalized using those values. Otherwise, if no img_statistics is provided but scale_img is True, the images are scaled by 1/255. The class inherits from nn.Module which allows it to be used as part of a neural network in PyTorch. The code also initializes an instance variable self.img_mean to None.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":0-31",
+ "content": "from copy import deepcopy\nfrom email import policy\nfrom typing import Dict, Optional\nimport numpy as np\nimport torch as th\nfrom gym3.types import DictType\nfrom torch import nn\nfrom torch.nn import functional as F\nfrom lib.action_head import make_action_head\nfrom lib.action_mapping import CameraHierarchicalMapping\nfrom lib.impala_cnn import ImpalaCNN\nfrom lib.normalize_ewma import NormalizeEwma\nfrom lib.scaled_mse_head import ScaledMSEHead\nfrom lib.tree_util import tree_map\nfrom lib.util import FanInInitReLULayer, ResidualRecurrentBlocks\nfrom lib.misc import transpose\nclass ImgPreprocessing(nn.Module):\n \"\"\"Normalize incoming images.\n :param img_statistics: remote path to npz file with a mean and std image. If specified\n normalize images using this.\n :param scale_img: If true and img_statistics not specified, scale incoming images by 1/255.\n \"\"\"\n def __init__(self, img_statistics: Optional[str] = None, scale_img: bool = True):\n super().__init__()\n self.img_mean = None\n if img_statistics is not None:"
+ },
+ {
+ "comment": "This code defines a class named \"ImgObsProcess\" which is a subclass of nn.Module used for preprocessing images and observations. It loads image statistics (mean and std) from a file or uses default scale values based on the provided \"scale_img\". The forward method normalizes the input image by subtracting mean and dividing by std if img_mean and img_std are not None, otherwise it divides by ob_scale. The class also accepts parameters for creating an instance of ImpalaCNN followed by a linear layer.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":32-61",
+ "content": " img_statistics = dict(**np.load(img_statistics))\n self.img_mean = nn.Parameter(th.Tensor(img_statistics[\"mean\"]), requires_grad=False)\n self.img_std = nn.Parameter(th.Tensor(img_statistics[\"std\"]), requires_grad=False)\n else:\n self.ob_scale = 255.0 if scale_img else 1.0\n def forward(self, img):\n x = img.to(dtype=th.float32)\n if self.img_mean is not None:\n x = (x - self.img_mean) / self.img_std\n else:\n x = x / self.ob_scale\n return x\nclass ImgObsProcess(nn.Module):\n \"\"\"ImpalaCNN followed by a linear layer.\n :param cnn_outsize: impala output dimension\n :param output_size: output size of the linear layer.\n :param dense_init_norm_kwargs: kwargs for linear FanInInitReLULayer\n :param init_norm_kwargs: kwargs for 2d and 3d conv FanInInitReLULayer\n \"\"\"\n def __init__(\n self,\n cnn_outsize: int,\n output_size: int,\n dense_init_norm_kwargs: Dict = {},\n init_norm_kwargs: Dict = {},"
+ },
+ {
+ "comment": "This code defines a class called \"Policy\" with an initializer and a forward method. The initializer takes various parameters, creates an ImpalaCNN and FanInInitReLULayer layers, and initializes the CNN layer with given parameters. The forward method applies these layers to input images and returns the result.\nThe code also defines a class called \"MinecraftPolicy\" that extends nn.Module and takes recurrence_type as parameter. It doesn't have any methods defined.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":62-90",
+ "content": " **kwargs,\n ):\n super().__init__()\n self.cnn = ImpalaCNN(\n outsize=cnn_outsize,\n init_norm_kwargs=init_norm_kwargs,\n dense_init_norm_kwargs=dense_init_norm_kwargs,\n **kwargs,\n )\n self.linear = FanInInitReLULayer(\n cnn_outsize,\n output_size,\n layer_type=\"linear\",\n **dense_init_norm_kwargs,\n )\n def forward(self, img):\n return self.linear(self.cnn(img))\nclass MinecraftPolicy(nn.Module):\n \"\"\"\n :param recurrence_type:\n None - No recurrence, adds no extra layers\n lstm - (Depreciated). Singular LSTM\n multi_layer_lstm - Multi-layer LSTM. Uses n_recurrence_layers to determine number of consecututive LSTMs\n Does NOT support ragged batching\n multi_masked_lstm - Multi-layer LSTM that supports ragged batching via the first vector. This model is slower\n Uses n_recurrence_layers to determine number of consecututive LSTMs"
+ },
+ {
+ "comment": "This function is used to initialize an object of the class \"Policy\" which appears to be a deep learning model for reinforcement learning. The model can take both image and observation inputs, and uses a Dense transformer as part of its architecture. There are many optional parameters such as recurrence_type, impala_width, obs_processing_width, hidsize, single_output, img_shape, scale_input_img, only_img_input, init_norm_kwargs, impala_kwargs and more that can be used to customize the model.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":91-121",
+ "content": " transformer - Dense transformer\n :param init_norm_kwargs: kwargs for all FanInInitReLULayers.\n \"\"\"\n def __init__(\n self,\n recurrence_type=\"lstm\",\n impala_width=1,\n impala_chans=(16, 32, 32),\n obs_processing_width=256,\n hidsize=512,\n single_output=False, # True if we don't need separate outputs for action/value outputs\n img_shape=None,\n scale_input_img=True,\n only_img_input=False,\n init_norm_kwargs={},\n impala_kwargs={},\n # Unused argument assumed by forc.\n input_shape=None, # pylint: disable=unused-argument\n active_reward_monitors=None,\n img_statistics=None,\n first_conv_norm=False,\n diff_mlp_embedding=False,\n attention_mask_style=\"clipped_causal\",\n attention_heads=8,\n attention_memory_size=2048,\n use_pointwise_layer=True,\n pointwise_ratio=4,\n pointwise_use_activation=False,\n n_recurrence_layers=1,\n recurrence_is_residual=True,"
+ },
+ {
+ "comment": "The code defines a class with an __init__ method that takes various arguments, including the recurrence_type, active_reward_monitors, single_output, impala_width, impala_chans, hidsize, init_norm_kwargs and timesteps. It performs an assertion on the recurrence_type, initializes some variables and dictionaries, and defines a few more attributes based on these arguments.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":122-149",
+ "content": " timesteps=None,\n use_pre_lstm_ln=True, # Not needed for transformer\n **unused_kwargs,\n ):\n super().__init__()\n assert recurrence_type in [\n \"multi_layer_lstm\",\n \"multi_layer_bilstm\",\n \"multi_masked_lstm\",\n \"transformer\",\n \"none\",\n ]\n active_reward_monitors = active_reward_monitors or {}\n self.single_output = single_output\n chans = tuple(int(impala_width * c) for c in impala_chans)\n self.hidsize = hidsize\n # Dense init kwargs replaces batchnorm/groupnorm with layernorm\n self.init_norm_kwargs = init_norm_kwargs\n self.dense_init_norm_kwargs = deepcopy(init_norm_kwargs)\n if self.dense_init_norm_kwargs.get(\"group_norm_groups\", None) is not None:\n self.dense_init_norm_kwargs.pop(\"group_norm_groups\", None)\n self.dense_init_norm_kwargs[\"layer_norm\"] = True\n if self.dense_init_norm_kwargs.get(\"batch_norm\", False):\n self.dense_init_norm_kwargs.pop(\"batch_norm\", False)"
+ },
+ {
+ "comment": "Initializing layer norm for dense layers and setting up input processing components.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":150-177",
+ "content": " self.dense_init_norm_kwargs[\"layer_norm\"] = True\n # Setup inputs\n self.img_preprocess = ImgPreprocessing(img_statistics=img_statistics, scale_img=scale_input_img)\n self.img_process = ImgObsProcess(\n cnn_outsize=256,\n output_size=hidsize,\n inshape=img_shape,\n chans=chans,\n nblock=2,\n dense_init_norm_kwargs=self.dense_init_norm_kwargs,\n init_norm_kwargs=init_norm_kwargs,\n first_conv_norm=first_conv_norm,\n **impala_kwargs,\n )\n self.pre_lstm_ln = nn.LayerNorm(hidsize) if use_pre_lstm_ln else None\n self.diff_obs_process = None\n self.recurrence_type = recurrence_type\n self.recurrent_layer = None\n self.recurrent_layer = ResidualRecurrentBlocks(\n hidsize=hidsize,\n timesteps=timesteps,\n recurrence_type=recurrence_type,\n is_residual=recurrence_is_residual,\n use_pointwise_layer=use_pointwise_layer,"
+ },
+ {
+ "comment": "The code initializes a module with specified parameters including pointwise_ratio, pointwise_use_activation, attention_mask_style, attention_heads, attention_memory_size and n_block. Then it creates an instance of FanInInitReLULayer and LayerNorm for the last layer and final layer normalization respectively. It also defines a function output_latent_size to return the latent size, and another function forward which takes in observations, initial state, and context as input, performs image preprocessing and optional differential observation processing if specified, applies pre-LSTM normalization if present, then passes the processed data through the recurrent layer to obtain output x and updated state.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":178-207",
+ "content": " pointwise_ratio=pointwise_ratio,\n pointwise_use_activation=pointwise_use_activation,\n attention_mask_style=attention_mask_style,\n attention_heads=attention_heads,\n attention_memory_size=attention_memory_size,\n n_block=n_recurrence_layers,\n )\n self.lastlayer = FanInInitReLULayer(hidsize, hidsize, layer_type=\"linear\", **self.dense_init_norm_kwargs)\n self.final_ln = th.nn.LayerNorm(hidsize)\n def output_latent_size(self):\n return self.hidsize\n def forward(self, ob, state_in, context):\n first = context[\"first\"]\n x = self.img_preprocess(ob[\"img\"])\n x = self.img_process(x)\n if self.diff_obs_process:\n processed_obs = self.diff_obs_process(ob[\"diff_goal\"])\n x = processed_obs + x\n if self.pre_lstm_ln is not None:\n x = self.pre_lstm_ln(x)\n if self.recurrent_layer is not None:\n x, state_out = self.recurrent_layer(x, first, state_in)\n else:"
+ },
+ {
+ "comment": "The code defines a class `MinecraftAgentPolicy` that inherits from `nn.Module`. It takes in an action space, policy kwargs, and pi_head kwargs as parameters during initialization. Inside the initialization, it creates a network `self.net` using `MinecraftPolicy`, a value head `self.value_head` using `make_value_head`, and a policy head `self.pi_head` using `make_action_head`. The code also defines a method `initial_state(batchsize)` that returns the initial state of the recurrent layer if it exists, otherwise it returns None.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":208-237",
+ "content": " state_out = state_in\n x = F.relu(x, inplace=False)\n x = self.lastlayer(x)\n x = self.final_ln(x)\n pi_latent = vf_latent = x\n if self.single_output:\n return pi_latent, state_out\n return (pi_latent, vf_latent), state_out\n def initial_state(self, batchsize):\n if self.recurrent_layer:\n return self.recurrent_layer.initial_state(batchsize)\n else:\n return None\nclass MinecraftAgentPolicy(nn.Module):\n def __init__(self, action_space, policy_kwargs, pi_head_kwargs):\n super().__init__()\n self.net = MinecraftPolicy(**policy_kwargs)\n self.action_space = action_space\n self.value_head = self.make_value_head(self.net.output_latent_size())\n self.pi_head = self.make_action_head(self.net.output_latent_size(), **pi_head_kwargs)\n def make_value_head(self, v_out_size: int, norm_type: str = \"ewma\", norm_kwargs: Optional[Dict] = None):\n return ScaledMSEHead(v_out_size, 1, norm_type=norm_type, norm_kwargs=norm_kwargs)"
+ },
+ {
+ "comment": "This code defines a class that uses a neural network to make policy decisions. It includes methods for creating an action head, initializing the state, resetting parameters, and performing forward passes on input observations. The forward pass involves passing the observation through the network, extracting policy logits and value predictions using separate heads, and returning these outputs along with any updated state.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":239-268",
+ "content": " def make_action_head(self, pi_out_size: int, **pi_head_opts):\n return make_action_head(self.action_space, pi_out_size, **pi_head_opts)\n def initial_state(self, batch_size: int):\n return self.net.initial_state(batch_size)\n def reset_parameters(self):\n super().reset_parameters()\n self.net.reset_parameters()\n self.pi_head.reset_parameters()\n self.value_head.reset_parameters()\n def forward(self, obs, first: th.Tensor, state_in):\n if isinstance(obs, dict):\n # We don't want to mutate the obs input.\n obs = obs.copy()\n # If special \"mask\" key is in obs,\n # It's for masking the logits.\n # We take it out (the network doesn't need it)\n mask = obs.pop(\"mask\", None)\n else:\n mask = None\n (pi_h, v_h), state_out = self.net(obs, state_in, context={\"first\": first})\n pi_logits = self.pi_head(pi_h, mask=mask)\n vpred = self.value_head(v_h)\n return (pi_logits, vpred, None), state_out"
+ },
+ {
+ "comment": "This code defines three functions for handling actions and probabilities in a policy network. The first function `get_logprob_of_action` calculates the log probability of taking a given action based on the provided probability distribution. The second function `get_kl_of_action_dists` computes the KL divergence between two action probability distributions. Lastly, the `get_output_for_observation` function returns the probability distribution, value prediction, and new state for a given observation using the previous two functions.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":270-298",
+ "content": " def get_logprob_of_action(self, pd, action):\n \"\"\"\n Get logprob of taking action `action` given probability distribution\n (see `get_gradient_for_action` to get this distribution)\n \"\"\"\n ac = tree_map(lambda x: x.unsqueeze(1), action)\n log_prob = self.pi_head.logprob(ac, pd)\n assert not th.isnan(log_prob).any()\n return log_prob[:, 0]\n def get_kl_of_action_dists(self, pd1, pd2):\n \"\"\"\n Get the KL divergence between two action probability distributions\n \"\"\"\n return self.pi_head.kl_divergence(pd1, pd2)\n def get_output_for_observation(self, obs, state_in, first):\n \"\"\"\n Return gradient-enabled outputs for given observation.\n Use `get_logprob_of_action` to get log probability of action\n with the given probability distribution.\n Returns:\n - probability distribution given observation\n - value prediction for given observation\n - new state\n \"\"\"\n # We need to add a fictitious time dimension everywhere"
+ },
+ {
+ "comment": "Code is adding a time dimension to the observations and first state, then passing them through the model to get policies (pd), value predictions (vpred), and update the state. If a taken action is provided, it uses that for the current step instead of sampling from the policy. It calculates the log probability of the taken action and stores the results in a dictionary with keys \"log_prob\" and \"vpred\". The time dimension is removed after calculations are done.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":299-322",
+ "content": " obs = tree_map(lambda x: x.unsqueeze(1), obs)\n first = first.unsqueeze(1)\n (pd, vpred, _), state_out = self(obs=obs, first=first, state_in=state_in)\n return pd, self.value_head.denormalize(vpred)[:, 0], state_out\n @th.no_grad()\n def act(self, obs, first, state_in, stochastic: bool = True, taken_action=None, return_pd=False):\n # We need to add a fictitious time dimension everywhere\n obs = tree_map(lambda x: x.unsqueeze(1), obs)\n first = first.unsqueeze(1)\n (pd, vpred, _), state_out = self(obs=obs, first=first, state_in=state_in)\n if taken_action is None:\n ac = self.pi_head.sample(pd, deterministic=not stochastic)\n else:\n ac = tree_map(lambda x: x.unsqueeze(1), taken_action)\n log_prob = self.pi_head.logprob(ac, pd)\n assert not th.isnan(log_prob).any()\n # After unsqueezing, squeeze back to remove fictitious time dimension\n result = {\"log_prob\": log_prob[:, 0], \"vpred\": self.value_head.denormalize(vpred)[:, 0]}"
+ },
+ {
+ "comment": "This code defines a class called \"InverseActionNet\" that inherits from another class named \"MinecraftPolicy\". The class has an initializer which takes parameters for hidden size, 3D convolution parameters, and any other arguments passed to the parent class. It also contains two methods: \"policy\" and \"v\".\n\nThe \"policy\" method calculates the policy distribution (pd) and the value prediction (vpred) for a given observation (obs). It returns the pd, vpred, and an additional state_out. If return_pd is True, it also returns the first element of each vector in the pd array by using tree_map lambda function.\n\nThe \"v\" method predicts the value for a given MDP observation. It takes obs, first, and state_in as input parameters. After unsqueezing the obs and first variables, it calls the parent class's __call__ method to get pd, vpred, and state_out. Finally, it returns the denormalized vpred value of the first element in each vector by using self.value_head.denormalize function.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":323-356",
+ "content": " if return_pd:\n result[\"pd\"] = tree_map(lambda x: x[:, 0], pd)\n ac = tree_map(lambda x: x[:, 0], ac)\n return ac, state_out, result\n @th.no_grad()\n def v(self, obs, first, state_in):\n \"\"\"Predict value for a given mdp observation\"\"\"\n obs = tree_map(lambda x: x.unsqueeze(1), obs)\n first = first.unsqueeze(1)\n (pd, vpred, _), state_out = self(obs=obs, first=first, state_in=state_in)\n # After unsqueezing, squeeze back\n return self.value_head.denormalize(vpred)[:, 0]\nclass InverseActionNet(MinecraftPolicy):\n \"\"\"\n Args:\n conv3d_params: PRE impala 3D CNN params. They are just passed into th.nn.Conv3D.\n \"\"\"\n def __init__(\n self,\n hidsize=512,\n conv3d_params=None,\n **MCPoliy_kwargs,\n ):\n super().__init__(\n hidsize=hidsize,\n # If we're using 3dconv, then we normalize entire impala otherwise don't\n # normalize the first impala layer since we normalize the input"
+ },
+ {
+ "comment": "This code initializes a 3D convolution layer if the `conv3d_params` is not None. It also sets the initialization parameters for the 3D conv layer differently to avoid normalization of its input. The forward function applies the 3D convolution (if available) before processing the image stack.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":357-385",
+ "content": " first_conv_norm=conv3d_params is not None,\n **MCPoliy_kwargs,\n )\n self.conv3d_layer = None\n if conv3d_params is not None:\n # 3D conv is the first layer, so don't normalize its input\n conv3d_init_params = deepcopy(self.init_norm_kwargs)\n conv3d_init_params[\"group_norm_groups\"] = None\n conv3d_init_params[\"batch_norm\"] = False\n self.conv3d_layer = FanInInitReLULayer(\n layer_type=\"conv3d\",\n log_scope=\"3d_conv\",\n **conv3d_params,\n **conv3d_init_params,\n )\n def forward(self, ob, state_in, context):\n first = context[\"first\"]\n x = self.img_preprocess(ob[\"img\"])\n # Conv3D Prior to Impala\n if self.conv3d_layer is not None:\n x = self._conv3d_forward(x)\n # Impala Stack\n x = self.img_process(x)\n if self.recurrent_layer is not None:\n x, state_out = self.recurrent_layer(x, first, state_in)"
+ },
+ {
+ "comment": "The code defines a class `InverseActionPolicy` that inherits from `nn.Module`. This class represents an inverse action policy for a reinforcement learning model. It consists of two components: a network (`self.net`) and a policy head (`self.pi_head`). The network is responsible for mapping observations to a latent space, while the policy head maps the latent representation to a distribution over actions.\n\nThe `__init__` method initializes the instance of the class by setting the action space, creating an instance of the `InverseActionNet`, and then creating the policy head based on the specified output size from the network and any additional keyword arguments provided.\n\nThe `_conv3d_forward` function is a helper function that performs 3D convolution on input data and returns the result. It transposes the input tensor, applies a series of 1D convolutions along different axes, and then transposes the resulting tensor back to the original format.\n\nThe `_policy_and_value` method calculates the policy and value for an input observation by passing it through the network and policy head, applying a ReLU activation function, and normalizing the output distribution. It also returns the current internal state of the module.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":387-423",
+ "content": " x = F.relu(x, inplace=False)\n pi_latent = self.lastlayer(x)\n pi_latent = self.final_ln(x)\n return (pi_latent, None), state_out\n def _conv3d_forward(self, x):\n # Convert from (B, T, H, W, C) -> (B, H, W, C, T)\n x = transpose(x, \"bthwc\", \"bcthw\")\n new_x = []\n for mini_batch in th.split(x, 1):\n new_x.append(self.conv3d_layer(mini_batch))\n x = th.cat(new_x)\n # Convert back\n x = transpose(x, \"bcthw\", \"bthwc\")\n return x\nclass InverseActionPolicy(nn.Module):\n def __init__(\n self,\n action_space,\n pi_head_kwargs=None,\n idm_net_kwargs=None,\n ):\n super().__init__()\n self.action_space = action_space\n self.net = InverseActionNet(**idm_net_kwargs)\n pi_out_size = self.net.output_latent_size()\n pi_head_kwargs = {} if pi_head_kwargs is None else pi_head_kwargs\n self.pi_head = self.make_action_head(pi_out_size=pi_out_size, **pi_head_kwargs)\n def make_action_head(self, **kwargs):"
+ },
+ {
+ "comment": "This code defines a policy class for training reinforcement learning models. It has methods to reset the model's parameters, forward pass to obtain action-value logits and state, and a deterministic prediction method. The code uses PyTorch for tensor operations.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":424-456",
+ "content": " return make_action_head(self.action_space, **kwargs)\n def reset_parameters(self):\n super().reset_parameters()\n self.net.reset_parameters()\n self.pi_head.reset_parameters()\n def forward(self, obs, first: th.Tensor, state_in, **kwargs):\n if isinstance(obs, dict):\n # We don't want to mutate the obs input.\n obs = obs.copy()\n # If special \"mask\" key is in obs,\n # It's for masking the logits.\n # We take it out (the network doesn't need it)\n mask = obs.pop(\"mask\", None)\n else:\n mask = None\n (pi_h, _), state_out = self.net(obs, state_in=state_in, context={\"first\": first}, **kwargs)\n pi_logits = self.pi_head(pi_h, mask=mask)\n return (pi_logits, None, None), state_out\n @th.no_grad()\n def predict(\n self,\n obs,\n deterministic: bool = True,\n **kwargs,\n ):\n (pd, _, _), state_out = self(obs=obs, **kwargs)\n ac = self.pi_head.sample(pd, deterministic=deterministic)"
+ },
+ {
+ "comment": "The code computes the log probability of actions using the pi_head, checks for NaN values, and returns a dictionary containing the log_probability and pd. It also includes functions for initializing the state based on batch size.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/policy.py\":457-466",
+ "content": " log_prob = self.pi_head.logprob(ac, pd)\n assert not th.isnan(log_prob).any()\n result = {\"log_prob\": log_prob, \"pd\": pd}\n return ac, state_out, result\n def initial_state(self, batch_size: int):\n return self.net.initial_state(batch_size)"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/8ad7a98e-71d5-45b5-8d54-39289d955faf.json b/docs/doc/8ad7a98e-71d5-45b5-8d54-39289d955faf.json
new file mode 100644
index 0000000..c2bbbde
--- /dev/null
+++ b/docs/doc/8ad7a98e-71d5-45b5-8d54-39289d955faf.json
@@ -0,0 +1,25 @@
+{
+ "summary": "The code uses a decorator function to compute normalized entropy from categorical head outputs, considering masks and ignoring single-option cases. It also calculates the entropy of categorical and diagonal Gaussian action heads within a module by iterating over key-value pairs and returns average entropy.",
+ "details": [
+ {
+ "comment": "This code defines a decorator function `store_args` that takes a method as input, and when the decorated method is called, it stores its arguments as instance attributes of the class. It also handles default argument values and keyword-only arguments.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/minecraft_util.py\":0-31",
+ "content": "import functools\nimport inspect\nfrom typing import Optional, Tuple\nimport numpy as np\nimport torch\nfrom lib.action_head import (CategoricalActionHead, DiagGaussianActionHead,\n DictActionHead)\ndef store_args(method):\n \"\"\"Stores provided method args as instance attributes.\"\"\"\n argspec = inspect.getfullargspec(method)\n defaults = {}\n if argspec.defaults is not None:\n defaults = dict(zip(argspec.args[-len(argspec.defaults) :], argspec.defaults))\n if argspec.kwonlydefaults is not None:\n defaults.update(argspec.kwonlydefaults)\n arg_names = argspec.args[1:]\n @functools.wraps(method)\n def wrapper(*positional_args, **keyword_args):\n self = positional_args[0]\n # Get default arg values\n args = defaults.copy()\n # Add provided arg values\n for name, value in zip(arg_names, positional_args[1:]):\n args[name] = value\n args.update(keyword_args)\n self.__dict__.update(args)\n return method(*positional_args, **keyword_args)"
+ },
+ {
+ "comment": "This code calculates the normalized entropy from categorical head outputs and applies a mask if necessary. It divides the entropy by the log of the number of possible options, ignoring cases where only one option is available to avoid nonsense results. The count variable keeps track of how many times the condition for ignoring an option has been met.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/minecraft_util.py\":33-54",
+ "content": " return wrapper\ndef get_norm_entropy_from_cat_head(module, name, masks, logits):\n # Note that the mask has already been applied to the logits at this point\n entropy = -torch.sum(torch.exp(logits) * logits, dim=-1)\n if name in masks:\n n = torch.sum(masks[name], dim=-1, dtype=torch.float)\n norm_entropy = entropy / torch.log(n)\n # When the mask only allows one option the normalized entropy makes no sense\n # as it is basically both maximal (the distribution is as uniform as it can be)\n # and minimal (there is no variance at all).\n # A such, we ignore them for purpose of calculating entropy.\n zero = torch.zeros_like(norm_entropy)\n norm_entropy = torch.where(n.eq(1.0), zero, norm_entropy)\n count = n.not_equal(1.0).int()\n else:\n n = torch.tensor(logits.shape[-1], dtype=torch.float)\n norm_entropy = entropy / torch.log(n)\n count = torch.ones_like(norm_entropy, dtype=torch.int)\n # entropy is per-entry, still of size self.output_shape[:-1]; we need to reduce of the rest of it."
+ },
+ {
+ "comment": "This code calculates the entropy of categorical and diagonal Gaussian action heads in a given module and returns the total entropy and counts.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/minecraft_util.py\":55-78",
+ "content": " for _ in module.output_shape[:-1]:\n norm_entropy = norm_entropy.sum(dim=-1)\n count = count.sum(dim=-1)\n return norm_entropy, count\ndef get_norm_cat_entropy(module, masks, logits, template) -> Tuple[torch.Tensor, torch.Tensor]:\n entropy_sum = torch.zeros_like(template, dtype=torch.float)\n counts = torch.zeros_like(template, dtype=torch.int)\n for k, subhead in module.items():\n if isinstance(subhead, DictActionHead):\n entropy, count = get_norm_cat_entropy(subhead, masks, logits[k], template)\n elif isinstance(subhead, CategoricalActionHead):\n entropy, count = get_norm_entropy_from_cat_head(subhead, k, masks, logits[k])\n else:\n continue\n entropy_sum += entropy\n counts += count\n return entropy_sum, counts\ndef get_diag_guassian_entropy(module, logits, template) -> Optional[torch.Tensor]:\n entropy_sum = torch.zeros_like(template, dtype=torch.float)\n count = torch.zeros(1, device=template.device, dtype=torch.int)"
+ },
+ {
+ "comment": "Iterates over each key-value pair in the module, adds entropy from DiagGaussianActionHead or DictActionHead to entropy_sum, and returns the average entropy.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/minecraft_util.py\":79-87",
+ "content": " for k, subhead in module.items():\n if isinstance(subhead, DictActionHead):\n entropy_sum += get_diag_guassian_entropy(subhead, logits[k], template)\n elif isinstance(subhead, DiagGaussianActionHead):\n entropy_sum += module.entropy(logits)\n else:\n continue\n count += 1\n return entropy_sum / count"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/903dcbbc-91dd-401e-890a-99d089f47ff0.json b/docs/doc/903dcbbc-91dd-401e-890a-99d089f47ff0.json
new file mode 100644
index 0000000..f83ef14
--- /dev/null
+++ b/docs/doc/903dcbbc-91dd-401e-890a-99d089f47ff0.json
@@ -0,0 +1,60 @@
+{
+ "summary": "This code organizes player inputs in a video game using action mappings, manages camera actions, handles assertion checks and conversions for different action spaces like buttons, cameras, inventory keys, and factored action space mapping.",
+ "details": [
+ {
+ "comment": "This code defines a class \"ActionMapping\" that maps between the standard Minecraft action space and a new one defined by the user. It uses ordered dictionaries to represent different action groups such as buttons, and requires an odd number of camera bins for initialization.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":0-31",
+ "content": "import abc\nimport itertools\nfrom collections import OrderedDict\nfrom typing import Dict, List\nimport numpy as np\nfrom gym3.types import DictType, Discrete, TensorType\nfrom lib.actions import Buttons\nclass ActionMapping(abc.ABC):\n \"\"\"Class that maps between the standard MC factored action space and a new one you define!\n :param n_camera_bins: Need to specify this to define the original ac space for stats code\n \"\"\"\n # This is the default buttons groups, it can be changed for your action space\n BUTTONS_GROUPS = OrderedDict(\n hotbar=[\"none\"] + [f\"hotbar.{i}\" for i in range(1, 10)],\n fore_back=[\"none\", \"forward\", \"back\"],\n left_right=[\"none\", \"left\", \"right\"],\n sprint_sneak=[\"none\", \"sprint\", \"sneak\"],\n use=[\"none\", \"use\"],\n drop=[\"none\", \"drop\"],\n attack=[\"none\", \"attack\"],\n jump=[\"none\", \"jump\"],\n )\n def __init__(self, n_camera_bins: int = 11):\n assert n_camera_bins % 2 == 1, \"n_camera_bins should be odd\"\n self.n_camera_bins = n_camera_bins"
+ },
+ {
+ "comment": "This code defines an abstract base class for mapping actions to a new space. It includes methods for converting factored actions to the new space, converting actions in the new space back to the factored action space, returning a gym action space for updating the environment, and returning the null or zero action for this action space.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":32-63",
+ "content": " self.camera_null_bin = n_camera_bins // 2\n self.stats_ac_space = DictType(\n **{\n \"buttons\": TensorType(shape=(len(Buttons.ALL),), eltype=Discrete(2)),\n \"camera\": TensorType(shape=(2,), eltype=Discrete(n_camera_bins)),\n }\n )\n @abc.abstractmethod\n def from_factored(self, ac: Dict) -> Dict:\n \"\"\"Converts a factored action (ac) to the new space\n :param ac: Dictionary of actions that must have a batch dimension\n \"\"\"\n pass\n @abc.abstractmethod\n def to_factored(self, ac: Dict) -> Dict:\n \"\"\"Converts an action in the new space (ac) to the factored action space.\n :param ac: Dictionary of actions that must have a batch dimension\n \"\"\"\n pass\n @abc.abstractmethod\n def get_action_space_update(self):\n \"\"\"Return a magym (gym3) action space. This will be used to update the env action space.\"\"\"\n pass\n @abc.abstractmethod\n def get_zero_action(self):\n \"\"\"Return the zero or null action for this action space\"\"\""
+ },
+ {
+ "comment": "This function takes in button actions from a factored action space and a list of mutually exclusive buttons. It returns a list indicating which button (or none if no button was pressed) was chosen for each item in the input array, given that each group has the option of 'none'. The function checks if the shape of the input matches the expected number of buttons.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":64-81",
+ "content": " pass\n def factored_buttons_to_groups(self, ac_buttons: np.ndarray, button_group: List[str]) -> List[str]:\n \"\"\"For a mutually exclusive group of buttons in button_group, find which option\n in the group was chosen. Assumes that each button group has the option of 'none'\n meaning that no button in the group was pressed.\n :param ac_buttons: button actions from the factored action space. Should dims [B, len(Buttons.ALL)]\n :param button_group: List of buttons in a mutually exclusive group. Each item in the\n list should appear in Buttons.ALL except for the special case 'none' which means\n no button in the group was pressed. e.g. ['none', 'forward', 'back']. For now\n 'none' must be the first element of button_group\n Returns a list of length B, where each element is an item from button_group.\n \"\"\"\n assert ac_buttons.shape[1] == len(\n Buttons.ALL\n ), f\"There should be {len(Buttons.ALL)} buttons in the factored buttons space\""
+ },
+ {
+ "comment": "Ensures function works only when 'none' is in button_group. Maps non-zero action button indices to corresponding actions, handling special cases of mutual press for forward/back and left/right. Prioritizes later buttons in group if pressed at the same time.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":82-96",
+ "content": " assert button_group[0] == \"none\", \"This function only works if 'none' is in button_group\"\n # Actions in ac_buttons with order according to button_group\n group_indices = [Buttons.ALL.index(b) for b in button_group if b != \"none\"]\n ac_choices = ac_buttons[:, group_indices]\n # Special cases for forward/back, left/right where mutual press means do neither\n if \"forward\" in button_group and \"back\" in button_group:\n ac_choices[np.all(ac_choices, axis=-1)] = 0\n if \"left\" in button_group and \"right\" in button_group:\n ac_choices[np.all(ac_choices, axis=-1)] = 0\n ac_non_zero = np.where(ac_choices)\n ac_choice = [\"none\" for _ in range(ac_buttons.shape[0])]\n # Iterate over the non-zero indices so that if two buttons in a group were pressed at the same time\n # we give priority to the button later in the group. E.g. if hotbar.1 and hotbar.2 are pressed during the same\n # timestep, hotbar.2 is marked as pressed"
+ },
+ {
+ "comment": "This code defines two classes, IDMActionMapping and CameraHierarchicalMapping, which are action mappings used in a video game. The classes define methods to convert actions between factored and non-factored representations, get an action space update, and handle zero actions. These classes seem to be part of a larger system for mapping player inputs to actions in the game environment.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":97-121",
+ "content": " for index, action in zip(ac_non_zero[0], ac_non_zero[1]):\n ac_choice[index] = button_group[action + 1] # the zero'th index will mean no button pressed\n return ac_choice\nclass IDMActionMapping(ActionMapping):\n \"\"\"For IDM, but essentially this is just an identity mapping\"\"\"\n def from_factored(self, ac: Dict) -> Dict:\n return ac\n def to_factored(self, ac: Dict) -> Dict:\n return ac\n def get_action_space_update(self):\n \"\"\"Return a magym (gym3) action space. This will be used to update the env action space.\"\"\"\n return {\n \"buttons\": TensorType(shape=(len(Buttons.ALL),), eltype=Discrete(2)),\n \"camera\": TensorType(shape=(2,), eltype=Discrete(self.n_camera_bins)),\n }\n def get_zero_action(self):\n raise NotImplementedError()\nclass CameraHierarchicalMapping(ActionMapping):\n \"\"\"Buttons are joint as in ButtonsJointMapping, but now a camera on/off meta action is added into this joint space.\n When this meta action is triggered, the separate camera head chooses a camera action which is also now a joint space."
+ },
+ {
+ "comment": "This code adds camera meta actions to the BUTTONS_GROUPS and defines functions for mapping between button combinations, indices, and names.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":123-141",
+ "content": " :param n_camera_bins: number of camera bins in the factored space\n \"\"\"\n # Add camera meta action to BUTTONS_GROUPS\n BUTTONS_GROUPS = ActionMapping.BUTTONS_GROUPS.copy()\n BUTTONS_GROUPS[\"camera\"] = [\"none\", \"camera\"]\n BUTTONS_COMBINATIONS = list(itertools.product(*BUTTONS_GROUPS.values())) + [\"inventory\"]\n BUTTONS_COMBINATION_TO_IDX = {comb: i for i, comb in enumerate(BUTTONS_COMBINATIONS)}\n BUTTONS_IDX_TO_COMBINATION = {i: comb for i, comb in enumerate(BUTTONS_COMBINATIONS)}\n def __init__(self, *args, **kwargs):\n super().__init__(*args, **kwargs)\n self.camera_groups = OrderedDict(\n camera_x=[f\"camera_x{i}\" for i in range(self.n_camera_bins)],\n camera_y=[f\"camera_y{i}\" for i in range(self.n_camera_bins)],\n )\n self.camera_combinations = list(itertools.product(*self.camera_groups.values()))\n self.camera_combination_to_idx = {comb: i for i, comb in enumerate(self.camera_combinations)}\n self.camera_idx_to_combination = {i: comb for i, comb in enumerate(self.camera_combinations)}"
+ },
+ {
+ "comment": "Code chunk sets up arrays for button and camera action mappings.\nThe code defines button and camera indices, initializes arrays to store the factored actions for each joint action, and begins processing the button combinations.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":142-160",
+ "content": " self.camera_null_idx = self.camera_combination_to_idx[\n (f\"camera_x{self.camera_null_bin}\", f\"camera_y{self.camera_null_bin}\")\n ]\n self._null_action = {\n \"buttons\": self.BUTTONS_COMBINATION_TO_IDX[tuple(\"none\" for _ in range(len(self.BUTTONS_GROUPS)))]\n }\n self._precompute_to_factored()\n def _precompute_to_factored(self):\n \"\"\"Precompute the joint action -> factored action matrix.\"\"\"\n button_dim = self.stats_ac_space[\"buttons\"].size\n self.BUTTON_IDX_TO_FACTORED = np.zeros((len(self.BUTTONS_IDX_TO_COMBINATION), button_dim), dtype=int)\n self.BUTTON_IDX_TO_CAMERA_META_OFF = np.zeros((len(self.BUTTONS_IDX_TO_COMBINATION)), dtype=bool)\n self.CAMERA_IDX_TO_FACTORED = np.zeros((len(self.camera_idx_to_combination), 2), dtype=int)\n # Pre compute Buttons\n for jnt_ac, button_comb in self.BUTTONS_IDX_TO_COMBINATION.items():\n new_button_ac = np.zeros(len(Buttons.ALL), dtype=\"i\")\n if button_comb == \"inventory\":"
+ },
+ {
+ "comment": "Code is creating factored representations of action and camera combinations for each joint. It initializes new_button_ac to 1 for all inventory buttons, then checks if any other groups are selected and assigns those indices to 1 in new_button_ac. If the last combination is not \"camera\", it sets the camera_meta_off flag. Then it creates new_camera_ac with indices based on the camera group combinations and stores these factored representations for both action and camera in their respective dictionaries.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":161-179",
+ "content": " new_button_ac[Buttons.ALL.index(\"inventory\")] = 1\n else:\n for group_choice in button_comb[:-1]: # Last one is camera\n if group_choice != \"none\":\n new_button_ac[Buttons.ALL.index(group_choice)] = 1\n if button_comb[-1] != \"camera\": # This means camera meta action is off\n self.BUTTON_IDX_TO_CAMERA_META_OFF[jnt_ac] = True\n self.BUTTON_IDX_TO_FACTORED[jnt_ac] = new_button_ac\n # Pre compute camera\n for jnt_ac, camera_comb in self.camera_idx_to_combination.items():\n new_camera_ac = np.ones((2), dtype=\"i\") * self.camera_null_bin\n new_camera_ac[0] = self.camera_groups[\"camera_x\"].index(camera_comb[0])\n new_camera_ac[1] = self.camera_groups[\"camera_y\"].index(camera_comb[1])\n self.CAMERA_IDX_TO_FACTORED[jnt_ac] = new_camera_ac\n def from_factored(self, ac: Dict) -> Dict:\n \"\"\"Converts a factored action (ac) to the new space. Assumes ac has a batch dim\"\"\""
+ },
+ {
+ "comment": "This code is performing an assertion check to ensure that the \"camera\" and \"buttons\" labels have the correct dimensions. It then creates a dictionary of button choices for each group except camera, sets the camera action based on whether a non-null camera action was given, and finally generates new arrays of button and camera actions based on the choices.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":180-199",
+ "content": " assert ac[\"camera\"].ndim == 2, f\"bad camera label, {ac['camera']}\"\n assert ac[\"buttons\"].ndim == 2, f\"bad buttons label, {ac['buttons']}\"\n # Get button choices for everything but camera\n choices_by_group = OrderedDict(\n (k, self.factored_buttons_to_groups(ac[\"buttons\"], v)) for k, v in self.BUTTONS_GROUPS.items() if k != \"camera\"\n )\n # Set camera \"on off\" action based on whether non-null camera action was given\n camera_is_null = np.all(ac[\"camera\"] == self.camera_null_bin, axis=1)\n choices_by_group[\"camera\"] = [\"none\" if is_null else \"camera\" for is_null in camera_is_null]\n new_button_ac = []\n new_camera_ac = []\n for i in range(ac[\"buttons\"].shape[0]):\n # Buttons\n key = tuple([v[i] for v in choices_by_group.values()])\n if ac[\"buttons\"][i, Buttons.ALL.index(\"inventory\")] == 1:\n key = \"inventory\"\n new_button_ac.append(self.BUTTONS_COMBINATION_TO_IDX[key])\n # Camera -- inventory is also exclusive with camera"
+ },
+ {
+ "comment": "This code is converting an action in the new space (ac) to the factored action space. It first checks if the \"inventory\" key is present, and if so, uses a specific key format. For other keys, it uses a different key format. Then it appends the camera indices to a list. The function returns a dictionary with buttons and cameras in the new action space. If the input action has a batch dimension, the code asserts that the shape of both \"camera\" and \"buttons\" are 1, squeezes them, maps the button indices to factored action space, calculates camera offsets, maps the camera indices to factored action space, and replaces the null camera values with \"camera_null_bin\".",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":200-222",
+ "content": " if key == \"inventory\":\n key = (\n f\"camera_x{self.camera_null_bin}\",\n f\"camera_y{self.camera_null_bin}\",\n )\n else:\n key = (f\"camera_x{ac['camera'][i][0]}\", f\"camera_y{ac['camera'][i][1]}\")\n new_camera_ac.append(self.camera_combination_to_idx[key])\n return dict(\n buttons=np.array(new_button_ac)[:, None],\n camera=np.array(new_camera_ac)[:, None],\n )\n def to_factored(self, ac: Dict) -> Dict:\n \"\"\"Converts an action in the new space (ac) to the factored action space. Assumes ac has a batch dim\"\"\"\n assert ac[\"camera\"].shape[-1] == 1\n assert ac[\"buttons\"].shape[-1] == 1\n new_button_ac = self.BUTTON_IDX_TO_FACTORED[np.squeeze(ac[\"buttons\"], -1)]\n camera_off = self.BUTTON_IDX_TO_CAMERA_META_OFF[np.squeeze(ac[\"buttons\"], -1)]\n new_camera_ac = self.CAMERA_IDX_TO_FACTORED[np.squeeze(ac[\"camera\"], -1)]\n new_camera_ac[camera_off] = self.camera_null_bin"
+ },
+ {
+ "comment": "This code defines a class with three methods. The first method returns a dictionary containing the \"buttons\" and \"camera\" actions. The second method specifies the action space update, defining the shape and type for both \"camera\" and \"buttons\". The third method returns a null action.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/action_mapping.py\":224-233",
+ "content": " return dict(buttons=new_button_ac, camera=new_camera_ac)\n def get_action_space_update(self):\n return {\n \"camera\": TensorType(shape=(1,), eltype=Discrete(len(self.camera_combinations))),\n \"buttons\": TensorType(shape=(1,), eltype=Discrete(len(self.BUTTONS_COMBINATIONS))),\n }\n def get_zero_action(self):\n return self._null_action"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/92354371-a8e4-4050-86ea-b7f99e23c23c.json b/docs/doc/92354371-a8e4-4050-86ea-b7f99e23c23c.json
new file mode 100644
index 0000000..d0fd2f3
--- /dev/null
+++ b/docs/doc/92354371-a8e4-4050-86ea-b7f99e23c23c.json
@@ -0,0 +1,135 @@
+{
+ "summary": "The comments discuss Minecraft AI model training, reinforcement learning features, and limited resource data collection for the MineRL BASALT 2022 competition, as well as building a house in under 10 minutes without harming villages.",
+ "details": [
+ {
+ "comment": "This code provides instructions for setting up and running the Video PreTraining (VPT) model, which learns to act by watching unlabeled online videos. It also includes a link to the paper describing the methodology and provides information on where to find more resources related to VPT. The code includes commands to install pre-requisites, requirements, and run the agent models. Additionally, it lists various pre-trained Minecraft models with their respective model files and weights.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":2-34",
+ "content": "# Video-Pre-Training\nVideo PreTraining (VPT): Learning to Act by Watching Unlabeled Online Videos\n> :page_facing_up: [Read Paper](https://cdn.openai.com/vpt/Paper.pdf) \\\n :mega: [Blog Post](https://openai.com/blog/vpt) \\\n :space_invader: [MineRL Environment](https://github.com/minerllabs/minerl) (note version 1.0+ required) \\\n :checkered_flag: [MineRL BASALT Competition](https://www.aicrowd.com/challenges/neurips-2022-minerl-basalt-competition)\n# Running agent models\nInstall pre-requirements for [MineRL](https://minerl.readthedocs.io/en/latest/tutorials/index.html).\nThen install requirements with:\n```\npip install git+https://github.com/minerllabs/minerl\npip install -r requirements.txt\n```\nTo run the code, call\n```\npython run_agent.py --model [path to .model file] --weights [path to .weight file]\n```\nAfter loading up, you should see a window of the agent playing Minecraft.\n# Agent Model Zoo\nBelow are the model files and weights files for various pre-trained Minecraft models.\nThe 1x, 2x and 3x model files correspond to their respective model weights width."
+ },
+ {
+ "comment": "This code provides links to download pre-trained models for Minecraft reinforcement learning, trained using behavioral cloning on video demonstrations. Foundational models are trained across all videos in a single training run, while house and early game models refine further with specific data sets.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":36-50",
+ "content": "* [:arrow_down: 1x Model](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-1x.model)\n* [:arrow_down: 2x Model](https://openaipublic.blob.core.windows.net/minecraft-rl/models/2x.model)\n* [:arrow_down: 3x Model](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-3x.model)\n### Demonstration Only - Behavioral Cloning\nThese models are trained on video demonstrations of humans playing Minecraft\nusing behavioral cloning (BC) and are more general than later models which\nuse reinforcement learning (RL) to further optimize the policy.\nFoundational models are trained across all videos in a single training run\nwhile house and early game models refine their respective size foundational\nmodel further using either the housebuilding contractor data or early game video\nsub-set. See the paper linked above for more details.\n#### Foundational Model :chart_with_upwards_trend:\n * [:arrow_down: 1x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-1x.weights)"
+ },
+ {
+ "comment": "This code provides links to pre-trained models for Minecraft gameplay and mentions fine-tuned models from specific starting points. Additionally, it introduces models with environment interactions that are trained using a reward function aimed at obtaining diamond pickaxes.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":51-64",
+ "content": " * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-2x.weights)\n * [:arrow_down: 3x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-3x.weights)\n#### Fine-Tuned from House :chart_with_upwards_trend:\n * [:arrow_down: 3x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/bc-house-3x.weights)\n#### Fine-Tuned from Early Game :chart_with_upwards_trend:\n * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/bc-early-game-2x.weights)\n * [:arrow_down: 3x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/bc-early-game-3x.weights)\n### Models With Environment Interactions\nThese models further refine the above demonstration based models with a reward\nfunction targeted at obtaining diamond pickaxes. While less general then the behavioral\ncloning models, these models have the benefit of interacting with the environment"
+ },
+ {
+ "comment": "This code provides pre-trained models for Minecraft gameplay using reinforcement learning and an Inverse Dynamics Model (IDM). The models are trained with different reward functions and excel at progressing quickly through the tech tree. Users can download the models and weights from specified URLs to run the IDM, which predicts player actions in a video recording.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":65-84",
+ "content": "using a reward function and excel at progressing through the tech tree quickly.\nSee the paper for more information\non how they were trained and the exact reward schedule.\n#### RL from Foundation :chart_with_upwards_trend:\n * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/rl-from-foundation-2x.weights)\n#### RL from House :chart_with_upwards_trend:\n * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/rl-from-house-2x.weights)\n#### RL from Early Game :chart_with_upwards_trend:\n * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/rl-from-early-game-2x.weights)\n# Running Inverse Dynamics Model (IDM)\nIDM aims to predict what actions player is taking in a video recording.\nSetup:\n* Install requirements: `pip install -r requirements.txt`\n* Download the IDM model [.model :arrow_down:](https://openaipublic.blob.core.windows.net/minecraft-rl/idm/4x_idm.model) and [.weight :arrow_down:](https://openaipublic.blob.core.windows.net/minecraft-rl/idm/4x_idm.weights) files"
+ },
+ {
+ "comment": "This code provides a demonstration of the Inverse Dynamics Model (IDM) using provided contractor recordings for video and actions. The model is run with specific file paths, and a window displays the predicted and true actions side-by-side. Note that this script is designed for demonstration purposes only, not for practical implementation.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":85-98",
+ "content": "* For demonstration purposes, you can use the contractor recordings shared below to. For this demo we use\n [this .mp4](https://openaipublic.blob.core.windows.net/minecraft-rl/data/10.0/cheeky-cornflower-setter-02e496ce4abb-20220421-092639.mp4)\n and [this associated actions file (.jsonl)](https://openaipublic.blob.core.windows.net/minecraft-rl/data/10.0/cheeky-cornflower-setter-02e496ce4abb-20220421-092639.jsonl).\nTo run the model with above files placed in the root directory of this code:\n```\npython run_inverse_dynamics_model.py -weights 4x_idm.weights --model 4x_idm.model --video-path cheeky-cornflower-setter-02e496ce4abb-20220421-092639.mp4 --jsonl-path cheeky-cornflower-setter-02e496ce4abb-20220421-092639.jsonl\n```\nA window should pop up which shows the video frame-by-frame, showing the predicted and true (recorded) actions side-by-side on the left.\nNote that `run_inverse_dynamics_model.py` is designed to be a demo of the IDM, not code to put it into practice.\n# Using behavioural cloning to fine-tune the models"
+ },
+ {
+ "comment": "This code is a demonstration of video pre-training using behavioral cloning. It requires installing dependencies, downloading model and data files, and then fine-tuning the model with custom weights for better performance.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":100-113",
+ "content": "**Disclaimer:** This code is a rough demonstration only and not an exact recreation of what original VPT paper did (but it contains some preprocessing steps you want to be aware of)! As such, do not expect replicate the original experiments with this code. This code has been designed to be run-able on consumer hardware (e.g., 8GB of VRAM).\nSetup:\n* Install requirements: `pip install -r requirements.txt`\n* Download `.weights` and `.model` file for model you want to fine-tune.\n* Download contractor data (below) and place the `.mp4` and `.jsonl` files to the same directory (e.g., `data`). With default settings, you need at least 12 recordings.\nIf you downloaded the \"1x Width\" models and placed some data under `data` directory, you can perform finetuning with\n```\npython behavioural_cloning.py --data-dir data --in-model foundation-model-1x.model --in-weights foundation-model-1x.weights --out-weights finetuned-1x.weights\n```\nYou can then use `finetuned-1x.weights` when running the agent. You can change the training settings at the top of `behavioural_cloning.py`."
+ },
+ {
+ "comment": "This code is a README.md file that lists the major limitations of the pre-training, describes a contractor demonstrations section, and provides details about the versions used for this project. It also mentions how different versions were used to change modalities or prompt changes due to contractor's internal questions and clarifications.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":115-135",
+ "content": "Major limitations:\n- Only trains single step at the time, i.e., errors are not propagated through timesteps.\n- Computes gradients one sample at a time to keep memory use low, but also slows down the code.\n# Contractor Demonstrations\n### Versions\nOver the course of the project we requested various demonstrations from contractors\nwhich we release as index files below. In general, major recorder versions change for a new\nprompt or recording feature while bug-fixes were represented as minor version changes.\nHowever, some\nrecorder versions we asked contractors to change their username when recording particular\nmodalities. Also, as contractors internally ask questions, clarification from one contractor may\nresult in a behavioral change in the other contractor. It is intractable to share every contractor's\nview for each version, but we've shared the prompts and major clarifications for each recorder\nversion where the task changed significantly.\n \n Initial Prompt \n We are collect"
+ },
+ {
+ "comment": "This code is a list of available versions for the Minecraft AI model training program. It includes a description of the features in each version and provides links to download the modified Minecraft version for training.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":135-151",
+ "content": "ing data for training AI models in Minecraft. You'll need to install java, download the modified version of minecraft (that collects and uploads your play data), and play minecraft survival mode! Paid per hour of gameplay. Prior experience in minecraft not. necessary. We do not collect any data that is unrelated to minecraft from your computer.\n \nThe following is a list of the available versions:\n* **6.x** Core recorder features subject to change [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_6xx_Jun_29.json)\n * 6.9 First feature complete recorder version\n * 6.10 Fixes mouse scaling on Mac when gui is open\n * 6.11 Tracks the hotbar slot\n * 6.13 Sprinting, swap-hands, ... (see commits below)\n \n Commits \n * improve replays that are cut in the middle of gui; working on riding boats / replays cut in the middle of a run\n * improve replays by adding dwheel action etc, also, loosen up replay tolerances\n * opencv version bump"
+ },
+ {
+ "comment": "This code seems to be part of a README file for a Minecraft-based reinforcement learning project. The comments suggest that the developers are working on various features such as swapping hands and implementing replaying from running and sprinting. They also mention not recording sprinting data, checking for mouse button numbers, and handling errors related to recording mouse or keyboard input as null. Additionally, there is a note about prompting contractors to collect early game data (first 30 minutes) in newer worlds to help improve the AI's performance in those stages of the game.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":152-167",
+ "content": " * add swap hands, and recording of the step timestamp\n * implement replaying from running and sprinting and tests\n * do not record sprinting (can use stats for that)\n * check for mouse button number, ignore >2\n * handle the errors when mouse / keyboard are recorded as null\n \n* **7.x** Prompt changes [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_7xx_Apr_6.json)\n * 7.6 Bump version for internal tracking\n \n Additional ask to contractors \n Right now, early game data is especially valuable to us. As such, we request that at least half of the data you upload is from the first 30 minutes of the game. This means that, for every hour of gameplay you spend in an older world, we ask you to play two sessions in which you create a new world and play for 30 minutes. You can play for longer in these worlds, but only the first 30 minutes counts as early game data.\n \n* **8.x** :clipboard"
+ },
+ {
+ "comment": "This code is providing instructions for a new task, \"Building a Simple House\". The task involves building a house using primarily wood, dirt, and sand, along with crafted wood items. The structure can be decorated as desired but should not use difficult materials such as stone. It also provides information about the need to update the recording script and use a new world each time.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":167-181",
+ "content": ": House Building from Scratch Task [:arrow_down: index](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_8xx_Jun_29.json)\n \n Changes and Prompt \n Hi all! Thank you for your hard work so far.\n This week we would like to have you all collect data on a specific task.\n This comes with a new recorder version 8.0 which you will need to update your recording script to download.\n This week we would like you to use a new world each time you play, so loading existing worlds is disabled.\n The new task is as follows:\n Starting in a new world, build a simple house in 10-15 minutes. This corresponds to one day and a bit of the night. Please use primarily wood, dirt, and sand, as well as crafted wood items such as doors, fences, ect. in constructing your house. Avoid using difficult items such as stone. Aside from those constraints, you may decorate the structure you build as you wish. It does not need to have any specific furniture. For example, it is "
+ },
+ {
+ "comment": "This code is for a Minecraft demonstration task where the player must build a house within 20 minutes. If not finished, they should exit and continue with another demo. Narration is required throughout. The demonstrations need to be completed in one session without resuming after exiting or going back to the main menu. Save files will be saved locally for viewing later but may be used for future tasks if space permits. The task requires a simple house without sleep areas and avoids cobblestone, stone, and granite. Stone tools are acceptable but time-limited. The episode ends after 10 realtime minutes. World names follow the format \"build-house-15-min-\" + random number.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":181-193",
+ "content": "OK if there is no bed in your house. If you have not finished the house by the sunrise (20 minutes) please exit and continue to another demonstration. Please continue to narrate what you are doing while completing this task.\n Since you will be unable to resume building after exiting Minecraft or going back to the main menu, you must finish these demonstrations in one session. Pausing via the menu is still supported. If you want to view your creations later, they will be saved locally so you can look at them in your own time. We may use these save files in a future task so if you have space, please leave the save files titled \u201cbuild-house-15-min-\u201c.\n For this week try to avoid all cobblestone / stone / granite\n For this week we just want simple houses without sleeping. If 10 minutes is too short, let us know and we can think of how to adjust!\n Stone tools are ok but I think you may run-out of time\n Changes:\n * Timer ends episode after 10 realtime minutes\n * Worlds are named: `\"build-house-15-min-\" + Math.abs(random.nextInt());`"
+ },
+ {
+ "comment": "Introduces 10-minute timer for task completion.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":195-214",
+ "content": " \n * Note this version introduces 10-minute timer that ends the episode. It\n cut experiments short occasionally and was fixed in 9.1\n * 8.0 Simple House\n * 8.2 Update upload script\n* **9.x** :clipboard: House Building from Random Starting Materials Task [:arrow_down: index](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_9xx_Jun_29.json)\n \n Changes and Prompt \n You now will have 10 minutes to use the provided resources to build your house / home / or structure. In this version, the experiment will time out after 10 minutes if you are not complete so don't be alarmed if that happens, it is intentional.\n No need to use up all the resources! It's ok to collect a few things but spend the majority of the time placing blocks (the act of placing seems to be harder to learn)\n Changes:\n * Worlds are named: `\"design-house-10-min-\" + Math.abs(random.nextInt());`\n * Starting inventory given by code below\n \n "
+ },
+ {
+ "comment": "This code generates a random starting inventory for the player by adding basic tools, some random items, and building blocks to their hotbar and inventory.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":215-236",
+ "content": " Random Starting Inventory Code \n ```java\n Random random = new Random();\n List hotbar = new ArrayList<>();\n List inventory = new ArrayList<>();\n // Ensure we give the player the basic tools in their hot bar\n hotbar.add(new ItemStack(Items.STONE_AXE));\n hotbar.add(new ItemStack(Items.STONE_PICKAXE));\n hotbar.add(new ItemStack(Items.STONE_SHOVEL));\n hotbar.add(new ItemStack(Items.CRAFTING_TABLE));\n // Add some random items to the player hotbar as well\n addToList(hotbar, inventory, Items.TORCH, random.nextInt(16) * 2 + 2);\n // Next add main building blocks\n if (random.nextFloat() < 0.7) {\n addToList(hotbar, inventory, Items.OAK_FENCE_GATE, random.nextInt(5));\n addToList(hotbar, inventory, Items.OAK_FENCE, random.nextInt(5) * 64);\n addToList(hotbar, inventory, Items.OAK_DOOR, random.nextInt(5));\n addToList(hotbar, inventory, Items.OAK_TRAPDOOR, random.nextInt(2) * 2);"
+ },
+ {
+ "comment": "This code randomly selects items to add to the hotbar based on the type of biome the player is in. It uses different lists of items for oak and birch biomes.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":237-249",
+ "content": " addToList(hotbar, inventory, Items.OAK_PLANKS, random.nextInt(3) * 64 + 128);\n addToList(hotbar, inventory, Items.OAK_SLAB, random.nextInt(3) * 64);\n addToList(hotbar, inventory, Items.OAK_STAIRS, random.nextInt(3) * 64);\n addToList(hotbar, inventory, Items.OAK_LOG, random.nextInt(2) * 32);\n addToList(hotbar, inventory, Items.OAK_PRESSURE_PLATE, random.nextInt(5));\n } else {\n addToList(hotbar, inventory, Items.BIRCH_FENCE_GATE, random.nextInt(5));\n addToList(hotbar, inventory, Items.BIRCH_FENCE, random.nextInt(5) * 64);\n addToList(hotbar, inventory, Items.BIRCH_DOOR, random.nextInt(5));\n addToList(hotbar, inventory, Items.BIRCH_TRAPDOOR, random.nextInt(2) * 2);\n addToList(hotbar, inventory, Items.BIRCH_PLANKS, random.nextInt(3) * 64 + 128);\n addToList(hotbar, inventory, Items.BIRCH_SLAB, random.nextInt(3) * 64);\n addToList(hotbar, inventory, Items.BIRCH_STAIRS, random.nextInt(3) * 64);"
+ },
+ {
+ "comment": "This code is adding a variety of items to the player's inventory. It uses random number generation to decide how many of each item to add, with some items having a higher chance of appearing than others (e.g., beds have a 20% chance). This helps create a diverse and unpredictable inventory for the player to work with.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":250-262",
+ "content": " addToList(hotbar, inventory, Items.BIRCH_LOG, random.nextInt(2) * 32);\n addToList(hotbar, inventory, Items.BIRCH_PRESSURE_PLATE, random.nextInt(5));\n }\n // Now add some random decoration items to the player inventory\n addToList(hotbar, inventory, Items.CHEST, random.nextInt(3));\n addToList(hotbar, inventory, Items.FURNACE, random.nextInt(2) + 1);\n addToList(hotbar, inventory, Items.GLASS_PANE, random.nextInt(5) * 4);\n addToList(hotbar, inventory, Items.WHITE_BED, (int) (random.nextFloat() + 0.2)); // Bed 20% of the time\n addToList(hotbar, inventory, Items.PAINTING, (int) (random.nextFloat() + 0.1)); // Painting 10% of the time\n addToList(hotbar, inventory, Items.FLOWER_POT, (int) (random.nextFloat() + 0.1) * 4); // 4 Flower pots 10% of the time\n addToList(hotbar, inventory, Items.OXEYE_DAISY, (int) (random.nextFloat() + 0.1) * 4); // 4 Oxeye daisies 10% of the time\n addToList(hotbar, inventory, Items.POPPY, (int) (random.nextFloat() + 0.1) * 4); // 4 Poppies 10% of the time"
+ },
+ {
+ "comment": "This code adds 4 sunflowers to the player's inventory 10% of the time, shuffles both the hotbar and inventory slots, and then gives the player the items if they are the same as the current player.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":263-287",
+ "content": " addToList(hotbar, inventory, Items.SUNFLOWER, (int) (random.nextFloat() + 0.1) * 4); // 4 Sunflowers 10% of the time\n // Shuffle the hotbar slots and inventory slots\n Collections.shuffle(hotbar);\n Collections.shuffle(inventory);\n // Give the player the items\n this.mc.getIntegratedServer().getPlayerList().getPlayers().forEach(p -> {\n if (p.getUniqueID().equals(this.getUniqueID())) {\n hotbar.forEach(p.inventory::addItemStackToInventory);\n inventory.forEach(p.inventory::addItemStackToInventory);\n }\n });\n ```\n \n * 9.0 First version\n * 9.1 Fixed timer bug\n* **10.0** :clipboard: Obtain Diamond Pickaxe Task [:arrow_down: index](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_10xx_Jun_29.json)\n \n Changes and Prompt \n Prompt:\n For this new task we have given you 20 minutes to craft a diamond pickaxe. We ask that you do not try to search for vi"
+ },
+ {
+ "comment": "Code discusses time limits for finding diamonds in Minecraft, changing version numbers, and requesting contractors to chop trees while recording gameplay with a specific username.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":287-303",
+ "content": "llages or other ways of getting diamonds, but if you are spawned in view of one, or happen to fall into a cave structure feel free to explore it for diamonds.\n If 20 min is not enough that is OK. It will happen on some seeds because of bad luck. Please do not use glitches to find the diamonds.\n Changes:\n * change to 20 minute time limit\n * _don't count gui time as part of the time limit_\n * World are named `\"collect-diamond-pickaxe-15min-\" + Math.abs(random.nextInt());`\n \nSometimes we asked the contractors to signify other tasks besides changing the version. This\nprimarily occurred in versions 6 and 7 as 8, 9 and 10 are all task specific.\n\nPrompt to contractors (click to show) \nAnother request about additional time - please use some of it to chop trees. Specifically, please start the recorder by adding --username treechop argument to the script (i.e. use play --username treechop on windows, ./play.sh --username treechop on osx/linux), and spend some time"
+ },
+ {
+ "comment": "The code is providing instructions for the video game Minecraft, asking testers to spend a portion of their time chopping trees and using easily obtainable tools in a new world. It also mentions restrictions on the contractor's environment, such as windowed mode, downsampling, and disabling options like brightness and rendering settings.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":303-310",
+ "content": " chopping trees! Getting wooden or stone tools is ok, but please spend the majority of the with username treechop specifically chopping. I did it myself for about 15 minutes, and it does get boring pretty quickly, so I don't expect you to do it all the time, but please do at least a little bit of chopping. Feel free to play normally the rest of the time (but please restart without --username treechop argument when you are not chopping)\nHowever, it is preferable that you start a new world though, and use only the tools that are easily obtainable in that world. I'll see what I can do about getting player an iron axe - that sounds reasonable, and should not be hard, but will require a code update.\n \n### Environment\nWe restrict the contractors to playing Minecraft in windowed mode at 720p which we downsample at 20hz to 360p\nto minimize space. We also disabled the options screen to prevent the contractor from\nchanging things such as brightness, or rendering options. We ask contractors not to press keys"
+ },
+ {
+ "comment": "The code provides information about the format of demonstrations, which are broken into 5-minute segments containing compressed screen observations, actions, environment statistics, and checkpoint save files. It also mentions that there may be missing chunks from continuous demonstrations due to network errors. The index files for each version are provided as JSON files with a list of relative paths following a specific format.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":311-337",
+ "content": "such as f3 which shows a debug overlay, however some contractors may still do this.\n### Data format\nDemonstrations are broken up into up to 5 minute segments consisting of a series of\ncompressed screen observations, actions, environment statistics, and a checkpoint\nsave file from the start of the segment. Each relative path in the index will\nhave all the files for that given segment, however if a file was dropped while\nuploading, the corresponding relative path is not included in the index therefore\nthere may be missing chunks from otherwise continuous demonstrations.\nIndex files are provided for each version as a json file:\n```json\n{\n \"basedir\": \"https://openaipublic.blob.core.windows.net/data/\",\n \"relpaths\": [\n \"8.0/cheeky-cornflower-setter-74ae6c2eae2e-20220315-122354\",\n ...\n ]\n}\n```\nRelative paths follow the following format:\n* `/---`\n> Note that due to network errors, some segments may be missing from otherwise\ncontinuous demonstrations."
+ },
+ {
+ "comment": "The code defines the structure of files required by the data loader, including video observation file, action file, options file, and checkpoint save file. The action file contains a list of individual action dictionaries in JSON format, each representing mouse and keyboard inputs, game status, player position, and inventory for a specific tick or frame.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":339-387",
+ "content": "Your data loader can then find following files:\n* Video observation: `/.mp4`\n* Action file: `/.jsonl`\n* Options file: `/-options.json`\n* Checkpoint save file: `/.zip`\nThe action file is **not** a valid json object: each line in\naction file is an individual action dictionary.\nFor v7.x, the actions are in form\n```json\n{\n \"mouse\": {\n \"x\": 274.0,\n \"y\": 338.0,\n \"dx\": 0.0,\n \"dy\": 0.0,\n \"scaledX\": -366.0,\n \"scaledY\": -22.0,\n \"dwheel\": 0.0,\n \"buttons\": [],\n \"newButtons\": []\n },\n \"keyboard\": {\n \"keys\": [\n \"key.keyboard.a\",\n \"key.keyboard.s\"\n ],\n \"newKeys\": [],\n \"chars\": \"\"\n },\n \"isGuiOpen\": false,\n \"isGuiInventory\": false,\n \"hotbar\": 4,\n \"yaw\": -112.35006,\n \"pitch\": 8.099996,\n \"xpos\": 841.364694513396,\n \"ypos\": 63.0,\n \"zpos\": 24.956354839537802,\n \"tick\": 0,\n \"milli\": 1649575088006,\n \"inventory\": [\n {\n \"type\": \"oak_door\",\n \"quantity\": 3\n },\n {\n \"type\": \"oak_planks\",\n \"quantity\": 59"
+ },
+ {
+ "comment": "This code represents a JSON object containing game state information and player actions for Minecraft. It includes a list of items in the player's inventory, server tick data, and various statistics tracking the player's actions. The dataset is collected for the MineRL BASALT 2022 competition with around 150GB of data per task.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":388-417",
+ "content": " },\n {\n \"type\": \"stone_pickaxe\",\n \"quantity\": 1\n },\n {\n \"type\": \"oak_planks\",\n \"quantity\": 64\n }\n ],\n \"serverTick\": 6001,\n \"serverTickDurationMs\": 36.3466,\n \"stats\": {\n \"minecraft.custom:minecraft.jump\": 4,\n \"minecraft.custom:minecraft.time_since_rest\": 5999,\n \"minecraft.custom:minecraft.play_one_minute\": 5999,\n \"minecraft.custom:minecraft.time_since_death\": 5999,\n \"minecraft.custom:minecraft.walk_one_cm\": 7554,\n \"minecraft.use_item:minecraft.oak_planks\": 5,\n \"minecraft.custom:minecraft.fall_one_cm\": 269,\n \"minecraft.use_item:minecraft.glass_pane\": 3\n }\n}\n```\n# BASALT 2022 dataset\nWe also collected a dataset of demonstrations for the [MineRL BASALT 2022](https://www.aicrowd.com/challenges/neurips-2022-minerl-basalt-competition) competition, with around 150GB of data per task.\n**Note**: To avoid confusion with the competition rules, the action files (.jsonl) have been stripped of information that is not allowed in the competition. We will upload unmodified dataset after the competition ends."
+ },
+ {
+ "comment": "FindCave: Look for a cave and quit the game when inside one. No digging from surface. Timelimit: 3 minutes. Example recordings: https://www.youtube.com/watch?v=TclP_ozH-eg\n\nMakeWaterfall: Spawn in mountainous area, build waterfall, take a scenic picture, and quit the game. Timelimit: 5 minutes.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":419-438",
+ "content": "* **FindCave** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/find-cave-Jul-28.json)\n * \n Prompt to contractors (click to show) \n ```\n Look around for a cave. When you are inside one, quit the game by opening main menu and pressing \"Save and Quit To Title\".\n You are not allowed to dig down from the surface to find a cave.\n Timelimit: 3 minutes.\n Example recordings: https://www.youtube.com/watch?v=TclP_ozH-eg\n ```\n \n* **MakeWaterfall** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/waterfall-Jul-28.json)\n * \n Prompt to contractors (click to show) \n ```\n After spawning in a mountainous area with a water bucket and various tools, build a beautiful waterfall and then reposition yourself to \u201ctake a scenic picture\u201d of the same waterfall, and then quit the game by opening the menu and selecting \"Save and Quit to Title\"\n Timelimit: 5 minutes."
+ },
+ {
+ "comment": "This code provides a prompt for contractors to build an animal pen next to a village house using specific animals and materials, without harming the village.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":439-450",
+ "content": " Example recordings: https://youtu.be/NONcbS85NLA\n ```\n \n* **MakeVillageAnimalPen** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/pen-animals-Jul-28.json)\n * \n Prompt to contractors (click to show) \n ```\n After spawning in a village, build an animal pen next to one of the houses in a village. Use your fence posts to build one animal pen that contains at least two of the same animal. (You are only allowed to pen chickens, cows, pigs, sheep or rabbits.) There should be at least one gate that allows players to enter and exit easily. The animal pen should not contain more than one type of animal. (You may kill any extra types of animals that accidentally got into the pen.) Don\u2019t harm the village.\n After you are done, quit the game by opening the menu and pressing \"Save and Quit to Title\".\n You may need to terraform the area around a house to build a pen. When we say not to harm the village, e"
+ },
+ {
+ "comment": "The code describes tasks for AI to complete in the game of Minecraft. Tasks include not harming animals or damaging existing structures while building a new house, and giving a brief tour of the constructed house. There is also a time limit of 5 minutes and an example recording provided for reference.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":450-466",
+ "content": "xamples include taking animals from existing pens, damaging existing houses or farms, and attacking villagers. Animal pens must have a single type of animal: pigs, cows, sheep, chicken or rabbits.\n The food items can be used to lure in the animals: if you hold seeds in your hand, this attracts nearby chickens to you, for example.\n Timelimit: 5 minutes.\n Example recordings: https://youtu.be/SLO7sep7BO8\n ```\n \n* **BuildVillageHouse** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/build-house-Jul-28.json)\n * \n Prompt to contractors (click to show) \n ```\n Taking advantage of the items in your inventory, build a new house in the style of the village (random biome), in an appropriate location (e.g. next to the path through the village), without harming the village in the process.\n Then give a brief tour of the house (i.e. spin around slowly such that all of the walls and the roof are visible).\n "
+ },
+ {
+ "comment": "Instructions for building a house in Minecraft within 10 minutes, without causing harm to the village.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":466-486",
+ "content": " * You start with a stone pickaxe and a stone axe, and various building blocks. It\u2019s okay to break items that you misplaced (e.g. use the stone pickaxe to break cobblestone blocks).\n * You are allowed to craft new blocks.\n Please spend less than ten minutes constructing your house.\n You don\u2019t need to copy another house in the village exactly (in fact, we\u2019re more interested in having slight deviations, while keeping the same \"style\"). You may need to terraform the area to make space for a new house.\n When we say not to harm the village, examples include taking animals from existing pens, damaging existing houses or farms, and attacking villagers.\n After you are done, quit the game by opening the menu and pressing \"Save and Quit to Title\".\n Timelimit: 12 minutes.\n Example recordings: https://youtu.be/WeVqQN96V_g\n ```\n \n# Contribution\nThis was a large effort by a dedicated team at OpenAI:\n[Bowen Baker](https://github.com/bowenbaker),\n[Ilge Akkaya](https://github.com/ilge),"
+ },
+ {
+ "comment": "This code is acknowledging the authors of the model and crediting Anssi Kanervisto for preparing a minimal version of the code to be used in the MineRL BASALT competition.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/README.md\":487-496",
+ "content": "[Peter Zhokhov](https://github.com/pzhokhov),\n[Joost Huizinga](https://github.com/JoostHuizinga),\n[Jie Tang](https://github.com/jietang),\n[Adrien Ecoffet](https://github.com/AdrienLE),\n[Brandon Houghton](https://github.com/brandonhoughton),\n[Raul Sampedro](https://github.com/samraul),\nJeff Clune\nThe code here represents a minimal version of our model code which was\nprepared by [Anssi Kanervisto](https://github.com/miffyli) and others so that these models could be used as\npart of the MineRL BASALT competition."
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/925053b3-31c7-43ea-82f6-f2dce9a7ae76.json b/docs/doc/925053b3-31c7-43ea-82f6-f2dce9a7ae76.json
new file mode 100644
index 0000000..c3b4358
--- /dev/null
+++ b/docs/doc/925053b3-31c7-43ea-82f6-f2dce9a7ae76.json
@@ -0,0 +1,10 @@
+{
+ "summary": "Installs necessary libraries: PyTorch, Gym, attrs, and OpenCV Python.",
+ "details": [
+ {
+ "comment": "Installs necessary libraries: PyTorch, Gym, attrs, and OpenCV Python.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/requirements.txt\":0-3",
+ "content": "torch==1.9.0\ngym3\nattrs\nopencv-python"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/a2d1b2f8-7ef8-401b-bcd6-d13ace4d050a.json b/docs/doc/a2d1b2f8-7ef8-401b-bcd6-d13ace4d050a.json
new file mode 100644
index 0000000..b698458
--- /dev/null
+++ b/docs/doc/a2d1b2f8-7ef8-401b-bcd6-d13ace4d050a.json
@@ -0,0 +1,45 @@
+{
+ "summary": "The function develops a Masked Attention mechanism for time series data, incorporating parameters and considerations such as input size and mask type, and initializes an object for the masked attention based on these parameters. It defines a Masked Attention class with methods for state initialization, forward propagation, and handling causal masking, returning output and state information.",
+ "details": [
+ {
+ "comment": "This function returns a band diagonal mask for time series data, ensuring the attention is causal and limited to a specific maximum length. The mask is created based on the number of rows (frames receiving gradient) and columns (total frames including past context).",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/masked_attention.py\":0-30",
+ "content": "import functools\nimport torch as th\nfrom torch import nn\nimport lib.xf as xf\nfrom lib.minecraft_util import store_args\nfrom lib.tree_util import tree_map\n@functools.lru_cache()\ndef get_band_diagonal_mask(t: int, T: int, maxlen: int, batchsize: int, device: th.device) -> th.Tensor:\n \"\"\"Returns a band diagonal mask which is causal (upper triangle is masked)\n and such that any frame can only view up to maxlen total past frames\n including the current frame.\n Example Masks: Here 0 means that frame is masked and we mask it by adding a huge number to the attention logits (see orc.xf)\n t = 3, T = 3, maxlen = 3\n T\n t 1 0 0 | mask out T > t\n 1 1 0 |\n 1 1 1 |\n t = 3, T = 6, maxlen = 3\n t 0 1 1 1 0 0 | mask out T > t\n 0 0 1 1 1 0 |\n 0 0 0 1 1 1 |\n Args:\n t: number of rows (presumably number of frames recieving gradient)\n T: number of cols (presumably t + past context that isn't being gradient updated)\n maxlen: maximum number of frames (including current frame) any frame can attend to"
+ },
+ {
+ "comment": "This function takes the masked_attention function from Video-Pre-Training/lib/masked_attention.py and generates a Boolean mask of shape (batchsize, t, T) based on the given parameters. The mask will have the upper triangle and lower triangle (if maxlen is not None) masked out. The get_mask function takes additional parameters and returns a band diagonal mask that respects the masking past states if first_b11 is True, by zeros any past context.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/masked_attention.py\":31-53",
+ "content": " batchsize: number of masks to return\n device: torch device to place mask on\n Returns:\n Boolean mask of shape (batchsize, t, T)\n \"\"\"\n m = th.ones(t, T, dtype=bool)\n m.tril_(T - t) # Mask out upper triangle\n if maxlen is not None and maxlen < T: # Mask out lower triangle\n m.triu_(T - t - maxlen + 1)\n m_btT = m[None].repeat_interleave(batchsize, dim=0)\n m_btT = m_btT.to(device=device)\n return m_btT\ndef get_mask(first_b11: th.Tensor, state_mask: th.Tensor, t: int, T: int, maxlen: int, heads: int, device) -> th.Tensor:\n \"\"\"Returns a band diagonal mask that respects masking past states (columns 0:T-t inclusive)\n if first_b11 is True. See get_band_diagonal_mask for how the base mask is computed.\n This function takes that mask and first zeros out any past context if first_b11 is True.\n Say our context is in chunks of length t (so here T = 4t). We see that in the second batch we recieved first=True\n context t t t t\n first F T F F"
+ },
+ {
+ "comment": "This function receives various inputs including `first_b11`, `state_mask`, `t`, `T`, `maxlen`, `heads`, and `device`. It will return a Boolean mask of shape (batchsize * heads, t, T) and an updated state_mask. The purpose of this function is to update the state_mask based on the given inputs for the masked attention mechanism.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/masked_attention.py\":54-72",
+ "content": " Now, given this the mask should mask out anything prior to T < t; however since we don't have access to the past first_b11's\n we need to keep a state of the mask at those past timesteps. This is what state_mask is.\n In particular state_mask is a [b, t, T - t] mask matrix that contains the mask for the past T - t frames.\n Args: (See get_band_diagonal_mask for remaining args)\n first_b11: boolean tensor with shape [batchsize, 1, 1] indicating if the first timestep for each batch element had first=True\n state_mask: mask tensor of shape [b, t, T - t]\n t: number of mask rows (presumably number of frames for which we take gradient)\n T: number of mask columns (t + the number of past frames we keep in context)\n maxlen: actual context length\n heads: number of attention heads\n device: torch device\n Returns:\n m_btT: Boolean mask of shape (batchsize * heads, t, T)\n state_mask: updated state_mask\n \"\"\"\n b = first_b11.shape[0]"
+ },
+ {
+ "comment": "This code is creating a mask for self-attention in transformer layers. It ensures that frames from previous episodes are not considered in the attention calculation for each episode. The mask is generated based on the \"first\" flag, which indicates if it's the first timestep of each batch, and the state_mask to exclude past frames.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/masked_attention.py\":74-101",
+ "content": " if state_mask is None:\n state_mask = th.zeros((b, 1, T - t), dtype=bool, device=device)\n m_btT = get_band_diagonal_mask(t, T, maxlen, b, device).clone() # Should be shape B, t, T\n not_first = ~first_b11.to(device=device)\n m_btT[:, :, :-t] &= not_first # Zero out anything in the past if first is true\n m_btT[:, :, :-t] &= state_mask\n m_bhtT = m_btT[:, None].repeat_interleave(heads, dim=1)\n m_btT = m_bhtT.reshape((b * heads), t, T)\n # Update state_mask such that it reflects the most recent first\n state_mask = th.cat(\n [\n state_mask[:, :, t:] & not_first,\n th.ones((b, 1, min(t, T - t)), dtype=bool, device=device),\n ],\n dim=-1,\n )\n return m_btT, state_mask\nclass MaskedAttention(nn.Module):\n \"\"\"\n Transformer self-attention layer that removes frames from previous episodes from the hidden state under certain constraints.\n The constraints are:\n - The \"first\" flag can only be true for the first timestep of each batch. An assert will fire if other timesteps have first = True."
+ },
+ {
+ "comment": "The code is describing the parameters and considerations of a masked attention mechanism. The input size, memory size, number of heads, timesteps, and mask are explained. The memory size allows attending to both inner state frames and batch frames, while the mask option handles potential imbalances between the first and last frames' attending capabilities.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/masked_attention.py\":103-112",
+ "content": " input_size: The dimension of the input (which also happens to be the size of the output)\n memory_size: The number of frames to keep in the inner state. Note that when attending, we will be able to attend\n to both the frames in the inner state (which presumably won't have gradients anymore) and the frames\n in the batch. \"mask\" for some additional considerations on this.\n heads: The number of attention heads to use. Note that we will split the input into this number of heads, so\n input_size needs to be divisible by heads.\n timesteps: number of timesteps with which we'll be taking gradient\n mask: Can be \"none\" or \"clipped_causal\". \"clipped_causal\" is a normal causal mask but solves the following minor problem:\n if you have a state of length 128 and a batch of 128 frames, then the first frame of your batch will be able to\n attend to 128 previous frames, but the last one will be able to attend to 255 previous frames. In this example,"
+ },
+ {
+ "comment": "The function initializes an object for masked attention. It takes in parameters such as input size, memory size, number of heads, timesteps, and a mask option ('clipped_causal' or 'none'). The maximum length is calculated based on the memory size and timesteps. If the mask option is set to 'none', the mask parameter is set to None. An All2All object for attention is created with heads, maxlen, and the mask value. Finally, a SelfAttentionLayer object is initialized with input size, the All2All attention object, and other parameters such as scale, relattn, and cache_keep_len set accordingly.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/masked_attention.py\":113-146",
+ "content": " \"clipped_causal\" will make it so that the last frame can only attend to 128 previous frames, so that there is no\n bias coming from the position in the batch. None simply allows you to attend to any frame in the state + batch,\n which means you can also attend to future frames.\n \"\"\"\n @store_args\n def __init__(\n self,\n input_size,\n memory_size: int,\n heads: int,\n timesteps: int,\n mask: str = \"clipped_causal\",\n init_scale=1,\n norm=\"none\",\n log_scope=\"sa\",\n use_muP_factor=False,\n ):\n super().__init__()\n assert mask in {\"none\", \"clipped_causal\"}\n assert memory_size >= 0\n self.maxlen = memory_size - timesteps\n if mask == \"none\":\n mask = None\n self.orc_attn = xf.All2All(heads, self.maxlen, mask=mask is not None)\n self.orc_block = xf.SelfAttentionLayer(\n input_size,\n self.orc_attn,\n scale=init_scale,\n relattn=True,\n cache_keep_len=self.maxlen,"
+ },
+ {
+ "comment": "This code defines a class for Masked Attention, which has methods for initializing the state, forward propagation of a single layer, and defining the mask type. The initial_state method returns the initial state mask (None) and the initial state of the transformer with keys and queries zeros out. The forward method performs forward propagation of a single layer using the input, first_bt, and state as inputs. If the mask type is \"clipped_causal\", it applies a specific mask to the input.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/masked_attention.py\":147-172",
+ "content": " norm=norm,\n log_scope=log_scope,\n use_muP_factor=use_muP_factor,\n )\n def initial_state(self, batchsize: int, device=None):\n \"\"\"Return the initial state mask (None) and the initial state of the transformer (zerod out keys and queries)\"\"\"\n state = self.orc_block.initial_state(batchsize, initial_T=self.maxlen)\n state_mask = None\n if device is not None:\n state = tree_map(lambda x: x.to(device), state)\n return state_mask, state\n def forward(self, input_bte, first_bt, state):\n \"\"\"Forward propagation of a single layer\"\"\"\n state_mask, xf_state = state\n t = first_bt.shape[1]\n if self.mask == \"clipped_causal\":\n new_mask, state_mask = get_mask(\n first_b11=first_bt[:, [[0]]],\n state_mask=state_mask,\n t=t,\n T=t + self.maxlen,\n maxlen=self.maxlen,\n heads=self.heads,\n device=input_bte.device,"
+ },
+ {
+ "comment": "This code is defining a method in the class and returning comments for the code block. The method seems to be related to attention mechanism, where it applies masking to the input and returns output and state information. The log keys are defined as well for further logging purposes.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/masked_attention.py\":173-181",
+ "content": " )\n self.orc_block.attn.mask = new_mask\n output, xf_state = self.orc_block(input_bte, xf_state)\n return output, (state_mask, xf_state)\n def get_log_keys(self):\n # These are logged in xf.SelfAttentionLayer\n return [f\"activation_{stat}/{self.log_scope}/{k}\" for k in [\"K\", \"Q\", \"V\", \"A\", \"Aproj\"] for stat in [\"mean\", \"std\"]]"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/a9247fa2-5685-42aa-9868-427cb1469509.json b/docs/doc/a9247fa2-5685-42aa-9868-427cb1469509.json
new file mode 100644
index 0000000..dc1c958
--- /dev/null
+++ b/docs/doc/a9247fa2-5685-42aa-9868-427cb1469509.json
@@ -0,0 +1,50 @@
+{
+ "summary": "The code includes utility functions for manipulating data structures, defines partial applications and safe mapping functions for multiple lists, and registers different data types and their conversion functions for serialization using the PyTree API.",
+ "details": [
+ {
+ "comment": "This code block contains utility functions for manipulating dictionaries, tuples and lists. It defines two functions: unzip2 and partial. Unzip2 takes a list of pairs (x, y) and returns the corresponding x and y as separate tuples. Partial is a wrapper function that creates a partial application of another function with specified arguments or keyword arguments.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/tree_util.py\":0-34",
+ "content": "# Copyright 2018 Google LLC\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n# https://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# Copied this from jax, made it self-contained\n# Currently just used for improved_checkpoint\nimport collections\nimport functools\nimport itertools as it\nfrom collections.abc import Collection\nfrom typing import Dict, List, Optional\ndef unzip2(xys):\n xs = []\n ys = []\n for x, y in xys:\n xs.append(x)\n ys.append(y)\n return tuple(xs), tuple(ys)\ndef partial(fun, *args, **kwargs):\n wrapped = functools.partial(fun, *args, **kwargs)"
+ },
+ {
+ "comment": "This code contains three functions:\n1. `safe_zip()` - creates a list of tuples from multiple lists, asserting that all lists have the same length.\n2. `safe_map()` - applies a function to each element in a list (or multiple lists), asserting that all lists have the same length.\n3. `tree_map()` - maps a function over a pytree and returns a new pytree with the same structure, but with values at leaves given by applying the function to corresponding leaf values in the original tree.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/tree_util.py\":35-69",
+ "content": " functools.update_wrapper(wrapped, fun)\n wrapped._bound_args = args # pylint: disable=protected-access\n return wrapped\ndef safe_zip(*args: Collection) -> List[tuple]:\n n = len(args[0])\n for arg in args[1:]:\n assert len(arg) == n, \"length mismatch: {}\".format(list(map(len, args)))\n return list(zip(*args))\ndef safe_map(f, *args):\n args = list(map(list, args))\n n = len(args[0])\n for arg in args[1:]:\n assert len(arg) == n, \"length mismatch: {}\".format(list(map(len, args)))\n return list(map(f, *args))\ndef tree_map(f, tree, treat_as_leaves: Optional[List] = None):\n \"\"\"Map a function over a pytree to produce a new pytree.\n Args:\n f: function to be applied at each leaf.\n tree: a pytree to be mapped over.\n Returns:\n A new pytree with the same structure as `tree` but with the value at each\n leaf given by `f(x)` where `x` is the value at the corresponding leaf in\n `tree`.\n \"\"\"\n if treat_as_leaves is None:\n treat_as_leaves = []\n node_type = node_types.get(type(tree))"
+ },
+ {
+ "comment": "This function applies a multi-input function to the leaves of a pytree and its sibling pytrees, returning a new pytree with values determined by `f(x, *xs)` where `x` is the value at the corresponding leaf in the original tree and `xs` is the tuple of values at corresponding leaves in the sibling trees. If the node type allows for further processing, it iterates over the children and applies the function to each child and its corresponding siblings before returning a new pytree.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/tree_util.py\":70-92",
+ "content": " if node_type and type(tree) not in treat_as_leaves:\n children, node_spec = node_type.to_iterable(tree)\n new_children = [tree_map(f, child, treat_as_leaves) for child in children]\n return node_type.from_iterable(node_spec, new_children)\n else:\n return f(tree)\ndef tree_multimap(f, tree, *rest, treat_as_leaves: Optional[List] = None):\n \"\"\"Map a multi-input function over pytree args to produce a new pytree.\n Args:\n f: function that takes `1 + len(rest)` arguments, to be applied at the\n corresponding leaves of the pytrees.\n tree: a pytree to be mapped over, with each leaf providing the first\n positional argument to `f`.\n *rest: a tuple of pytrees, each with the same structure as `tree`.\n Returns:\n A new pytree with the same structure as `tree` but with the value at each\n leaf given by `f(x, *xs)` where `x` is the value at the corresponding leaf\n in `tree` and `xs` is the tuple of values at corresponding leaves in `rest`.\n \"\"\""
+ },
+ {
+ "comment": "This code is determining the appropriate node type for a given tree and iterating through trees to ensure they match. It then applies a function f to each tree, handling different cases based on whether the node type is specified or not.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/tree_util.py\":94-118",
+ "content": " if treat_as_leaves is None:\n treat_as_leaves = []\n node_type = node_types.get(type(tree))\n if node_type and type(tree) not in treat_as_leaves:\n children, node_spec = node_type.to_iterable(tree)\n all_children = [children]\n for other_tree in rest:\n other_children, other_node_data = node_type.to_iterable(other_tree)\n if other_node_data != node_spec:\n raise TypeError(\"Mismatch: {} != {}\".format(other_node_data, node_spec))\n all_children.append(other_children)\n new_children = [tree_multimap(f, *xs, treat_as_leaves=treat_as_leaves) for xs in zip(*all_children)]\n return node_type.from_iterable(node_spec, new_children)\n else:\n return f(tree, *rest)\ndef prefix_multimap(f, treedef, tree, *rest):\n \"\"\"Like tree_multimap but only maps down through a tree prefix.\"\"\"\n if isinstance(treedef, PyLeaf):\n return f(tree, *rest)\n else:\n node_type = node_types.get(type(tree))\n if node_type != treedef.node_type:"
+ },
+ {
+ "comment": "Code is iterating over a tree structure and checking if the nodes match in terms of node type, node data, and number of children. If any mismatches are found, a TypeError is raised. The function walks through the tree recursively and applies functions to both leaf and non-leaf nodes based on their types. Optional treat_as_leaves list specifies which types should be treated as leaves.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/tree_util.py\":119-140",
+ "content": " raise TypeError(\"Mismatch: {} != {}\".format(treedef.node_type, node_type))\n children, node_data = node_type.to_iterable(tree)\n if node_data != treedef.node_data:\n raise TypeError(\"Mismatch: {} != {}\".format(treedef.node_data, node_data))\n all_children = [children]\n for other_tree in rest:\n other_children, other_node_data = node_type.to_iterable(other_tree)\n if other_node_data != node_data:\n raise TypeError(\"Mismatch: {} != {}\".format(other_node_data, node_data))\n all_children.append(other_children)\n all_children = zip(*all_children)\n new_children = [prefix_multimap(f, td, *xs) for td, xs in zip(treedef.children, all_children)]\n return node_type.from_iterable(node_data, new_children)\ndef walk_pytree(f_node, f_leaf, tree, treat_as_leaves: Optional[List] = None):\n node_type = node_types.get(type(tree))\n if treat_as_leaves is None:\n treat_as_leaves = []\n if node_type and type(tree) not in treat_as_leaves:"
+ },
+ {
+ "comment": "Function `to_iterable` splits node type and specifications, walks tree using function `walk_pytree`, unzips the result into procedure children and child specifications, creates a `PyTreeDef` object, and returns the processed children and tree definition.\n`build_tree` recursively builds the tree by calling itself on each child of the current node and constructs the final node using the node type and data.\n`_tree_unflatten` recursively unflattens the tree by calling itself on each child of the current node.\nFunction `_num_leaves` returns 1 if the treedef is a leaf, otherwise it sums the number of leaves in each child.\nFunction `_nested_treedef` takes two node types and returns a nested tree definition.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/tree_util.py\":141-170",
+ "content": " children, node_spec = node_type.to_iterable(tree)\n proc_children, child_specs = unzip2([walk_pytree(f_node, f_leaf, child, treat_as_leaves) for child in children])\n tree_def = PyTreeDef(node_type, node_spec, child_specs)\n return f_node(proc_children), tree_def\n else:\n return f_leaf(tree), PyLeaf()\ndef build_tree(treedef, xs):\n if isinstance(treedef, PyLeaf):\n return xs\n else:\n # We use 'iter' for clearer error messages\n children = safe_map(build_tree, iter(treedef.children), iter(xs))\n return treedef.node_type.from_iterable(treedef.node_data, children)\ndef _tree_unflatten(xs, treedef):\n if isinstance(treedef, PyLeaf):\n return next(xs)\n else:\n children = safe_map(partial(_tree_unflatten, xs), treedef.children)\n return treedef.node_type.from_iterable(treedef.node_data, children)\ndef _num_leaves(treedef):\n return 1 if isinstance(treedef, PyLeaf) else sum(safe_map(_num_leaves, treedef.children))\ndef _nested_treedef(inner, outer):"
+ },
+ {
+ "comment": "The code defines a `PyTreeDef` class representing nodes in a tree structure. It checks if the input is a leaf node, then creates children objects using `_nested_treedef` and `safe_map`, and returns an instance of `PyTreeDef` with the given node type, data, and children. The class also provides a custom `__repr__` method for string representation, a `__hash__` method for hashability, and an `__eq__` method for equality comparison.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/tree_util.py\":171-200",
+ "content": " # just used in tree_transpose error checking\n if isinstance(outer, PyLeaf):\n return inner\n else:\n children = safe_map(partial(_nested_treedef, inner), outer.children)\n return PyTreeDef(outer.node_type, outer.node_data, tuple(children))\nclass PyTreeDef(object):\n def __init__(self, node_type, node_data, children):\n self.node_type = node_type\n self.node_data = node_data\n self.children = children\n def __repr__(self):\n if self.node_data is None:\n data_repr = \"\"\n else:\n data_repr = \"[{}]\".format(self.node_data)\n return \"PyTree({}{}, [{}])\".format(self.node_type.name, data_repr, \",\".join(safe_map(repr, self.children)))\n def __hash__(self):\n return hash((self.node_type, self.node_data, tuple(self.children)))\n def __eq__(self, other):\n if isinstance(other, PyLeaf):\n return False\n else:\n return self.node_type == other.node_type and self.node_data == other.node_data and self.children == other.children"
+ },
+ {
+ "comment": "This code defines a class hierarchy for representing and serializing objects in a tree-like structure. It includes methods for converting different types of collections (tuples, lists, dictionaries) to iterable and back. The `NodeType` class stores information about the type and its conversion rules, which are registered using the `register_pytree_node` function.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/tree_util.py\":202-255",
+ "content": " def __ne__(self, other):\n return not self == other\nclass PyLeaf(object):\n def __repr__(self):\n return \"*\"\n def __eq__(self, other):\n return isinstance(other, PyLeaf)\nclass NodeType(object):\n def __init__(self, name, to_iterable, from_iterable):\n self.name = name\n self.to_iterable = to_iterable\n self.from_iterable = from_iterable\nnode_types: Dict[type, NodeType] = {}\ndef register_pytree_node(py_type, to_iterable, from_iterable):\n assert py_type not in node_types\n node_types[py_type] = NodeType(str(py_type), to_iterable, from_iterable)\ndef tuple_to_iterable(xs):\n return xs, None\ndef tuple_from_iterable(_keys, xs):\n return tuple(xs)\ndef list_to_iterable(xs):\n return tuple(xs), None\ndef list_from_iterable(_keys, xs):\n return list(xs)\ndef dict_to_iterable(xs):\n keys = tuple(sorted(xs.keys()))\n return tuple(map(xs.get, keys)), keys\ndef dict_from_iterable(keys, xs):\n return dict(safe_zip(keys, xs))\ndef ordered_dict_from_iterable(keys, xs):\n return collections.OrderedDict(safe_zip(keys, xs))"
+ },
+ {
+ "comment": "This code registers different data types and their conversion functions for serialization using the PyTree API. It handles tuples, lists, dictionaries (including OrderedDict), collections.defaultdict, and None type. The default_dict_to_iterable, default_dict_from_iterable, none_to_iterable, and none_from_iterable functions handle the conversion to and from iterables for these data types.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/tree_util.py\":258-279",
+ "content": "def default_dict_to_iterable(xs):\n return (tuple(xs.values()), (xs.default_factory, tuple(xs.keys())))\ndef default_dict_from_iterable(keys, xs):\n return collections.defaultdict(keys[0], safe_zip(keys[1], xs))\ndef none_to_iterable(_xs):\n return (), None\ndef none_from_iterable(_keys, _xs):\n return None\nregister_pytree_node(tuple, tuple_to_iterable, tuple_from_iterable)\nregister_pytree_node(list, list_to_iterable, list_from_iterable)\nregister_pytree_node(dict, dict_to_iterable, dict_from_iterable)\nregister_pytree_node(collections.OrderedDict, dict_to_iterable, ordered_dict_from_iterable)\nregister_pytree_node(collections.defaultdict, default_dict_to_iterable, default_dict_from_iterable)\nregister_pytree_node(type(None), none_to_iterable, none_from_iterable)"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/d2d35480-1f74-4e33-8d40-424ce68f773c.json b/docs/doc/d2d35480-1f74-4e33-8d40-424ce68f773c.json
new file mode 100644
index 0000000..6e656b6
--- /dev/null
+++ b/docs/doc/d2d35480-1f74-4e33-8d40-424ce68f773c.json
@@ -0,0 +1,25 @@
+{
+ "summary": "IDMAgent is a Minecraft action predictor using the IDM model, featuring functions for initializing, loading weights, resetting state, and processing video frames. It converts policy output to MineRL format for agent state prediction.",
+ "details": [
+ {
+ "comment": "IDMAgent is a class representing an agent that uses the inverse dynamics model (IDM) to predict Minecraft player actions in videos. It has an action mapper and is initialized with idm_net_kwargs, pi_head_kwargs, and device (default device type if None).",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/inverse_dynamics_model.py\":0-32",
+ "content": "import numpy as np\nimport torch as th\nimport cv2\nfrom gym3.types import DictType\nfrom gym import spaces\nfrom lib.action_mapping import CameraHierarchicalMapping, IDMActionMapping\nfrom lib.actions import ActionTransformer\nfrom lib.policy import InverseActionPolicy\nfrom lib.torch_util import default_device_type, set_default_torch_device\nfrom agent import resize_image, AGENT_RESOLUTION\nACTION_TRANSFORMER_KWARGS = dict(\n camera_binsize=2,\n camera_maxval=10,\n camera_mu=10,\n camera_quantization_scheme=\"mu_law\",\n)\nclass IDMAgent:\n \"\"\"\n Sugarcoating on the inverse dynamics model (IDM) used to predict actions Minecraft players take in videos.\n Functionally same as MineRLAgent.\n \"\"\"\n def __init__(self, idm_net_kwargs, pi_head_kwargs, device=None):\n if device is None:\n device = default_device_type()\n self.device = th.device(device)\n # Set the default torch device for underlying code as well\n set_default_torch_device(self.device)\n self.action_mapper = IDMActionMapping(n_camera_bins=11)"
+ },
+ {
+ "comment": "Function: __init__\n- Initializes the agent with specified parameters and loads initial weights.\n\nFunction: load_weights\n- Loads model weights from a path and resets the hidden state of the agent.\n\nFunction: reset\n- Resets the agent to its initial state by setting the hidden state to the result of the policy's initial_state method with an argument of 1.\n\nFunction:_video_obs_to_agent\n- Takes a list of video frames, resizes them to AGENT_RESOLUTION, and returns the processed images for the agent to use.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/inverse_dynamics_model.py\":33-55",
+ "content": " action_space = self.action_mapper.get_action_space_update()\n action_space = DictType(**action_space)\n self.action_transformer = ActionTransformer(**ACTION_TRANSFORMER_KWARGS)\n idm_policy_kwargs = dict(idm_net_kwargs=idm_net_kwargs, pi_head_kwargs=pi_head_kwargs, action_space=action_space)\n self.policy = InverseActionPolicy(**idm_policy_kwargs).to(device)\n self.hidden_state = self.policy.initial_state(1)\n self._dummy_first = th.from_numpy(np.array((False,))).to(device)\n def load_weights(self, path):\n \"\"\"Load model weights from a path, and reset hidden state\"\"\"\n self.policy.load_state_dict(th.load(path, map_location=self.device), strict=False)\n self.reset()\n def reset(self):\n \"\"\"Reset agent to initial state (i.e., reset hidden state)\"\"\"\n self.hidden_state = self.policy.initial_state(1)\n def _video_obs_to_agent(self, video_frames):\n imgs = [resize_image(frame, AGENT_RESOLUTION) for frame in video_frames]\n # Add time and batch dim"
+ },
+ {
+ "comment": "Code snippet:\n```python\n def _agent_action_to_env(self, agent_action):\n \"\"\"Turn output from policy into action for MineRL\"\"\"\n # Manual conversion to numpy is important.\n action = {\n \"buttons\": agent_action[\"buttons\"].cpu().numpy(),\n \"camera\": agent_action[\"camera\"].cpu().numpy()\n }\n```\nComment: Converts policy output to MineRL action format using manual numpy conversion",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/inverse_dynamics_model.py\":56-78",
+ "content": " imgs = np.stack(imgs)[None]\n agent_input = {\"img\": th.from_numpy(imgs).to(self.device)}\n return agent_input\n def _agent_action_to_env(self, agent_action):\n \"\"\"Turn output from policy into action for MineRL\"\"\"\n # This is quite important step (for some reason).\n # For the sake of your sanity, remember to do this step (manual conversion to numpy)\n # before proceeding. Otherwise, your agent might be a little derp.\n action = {\n \"buttons\": agent_action[\"buttons\"].cpu().numpy(),\n \"camera\": agent_action[\"camera\"].cpu().numpy()\n }\n minerl_action = self.action_mapper.to_factored(action)\n minerl_action_transformed = self.action_transformer.policy2env(minerl_action)\n return minerl_action_transformed\n def predict_actions(self, video_frames):\n \"\"\"\n Predict actions for a sequence of frames.\n `video_frames` should be of shape (N, H, W, C).\n Returns MineRL action dict, where each action head"
+ },
+ {
+ "comment": "This function takes video frames as input, converts them to agent input, and uses the policy model to predict actions. It also maintains an internal hidden state for tracking the agent's state and can be reset using `reset()`.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/inverse_dynamics_model.py\":79-94",
+ "content": " has shape (N, ...).\n Agent's hidden state is tracked internally. To reset it,\n call `reset()`.\n \"\"\"\n agent_input = self._video_obs_to_agent(video_frames)\n # The \"first\" argument could be used to reset tell episode\n # boundaries, but we are only using this for predicting (for now),\n # so we do not hassle with it yet.\n dummy_first = th.zeros((video_frames.shape[0], 1)).to(self.device)\n predicted_actions, self.hidden_state, _ = self.policy.predict(\n agent_input, first=dummy_first, state_in=self.hidden_state,\n deterministic=True\n )\n predicted_minerl_action = self._agent_action_to_env(predicted_actions)\n return predicted_minerl_action"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/db701825-637c-44fd-b41d-84bfe67688a6.json b/docs/doc/db701825-637c-44fd-b41d-84bfe67688a6.json
new file mode 100644
index 0000000..730ecdc
--- /dev/null
+++ b/docs/doc/db701825-637c-44fd-b41d-84bfe67688a6.json
@@ -0,0 +1,80 @@
+{
+ "summary": "The code defines an attention mechanism with preprocessing methods and StridedAttn class, as well as SelfAttentionLayer and residual MLP layers for transformer models, including operations like concatenation, reshaping, and activation functions.",
+ "details": [
+ {
+ "comment": "The code snippet defines a function named \"attention\" which performs softmax(Q*K)*V operation. It takes query (Q), keys (K), and values (V) as input, along with the data type, mask, additional batch-to-time matrix (extra_btT), maximum length (maxlen), and a flag to check sentinel values (check_sentinel). The function checks if the data types match and then proceeds to perform the softmax operation.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":0-42",
+ "content": "\"\"\"\nImplementation of transformer and reshaping-based sparse transformer\n\"\"\"\nimport functools\nimport math\nimport torch as th\nfrom torch import nn\nfrom torch.nn import functional as F\nfrom lib import misc, mlp\nfrom lib import torch_util as tu\nfrom lib import util\nSENTINEL = 0.1337\ndef attention(\n Q_bte,\n K_bTe,\n V_bTe,\n dtype,\n mask=True,\n extra_btT=None,\n maxlen=None,\n check_sentinel=False,\n use_muP_factor=False,\n):\n \"\"\"\n performs softmax(Q*K)*V operation\n t : output (write) time axis, possibly size=1 for just the last timestep\n T : input (read) time axis\n t < T is OK\n 'check_sentinel' is used when you want to make it impossible to attend to certain keys.\n All keys where every value is equal to the constant SENTINEL will be ignored.\n Currently this is only used by StridedAttn.\n \"\"\"\n assert Q_bte.dtype == K_bTe.dtype == dtype, f\"{Q_bte.dtype}, {K_bTe.dtype}, {dtype} must all match\"\n e = Q_bte.shape[2]\n if check_sentinel:\n invalid = (K_bTe == SENTINEL).int().sum(dim=-1) == e"
+ },
+ {
+ "comment": "The code calculates the logits for a multi-head attention mechanism, taking into account masking and optional extra inputs. It applies the necessary transformations to the input tensors and performs the dot product between queries (Q) and keys (K). The result is then normalized using softmax function to obtain the weights (W_btT) for the attention process.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":43-67",
+ "content": " invalid = misc.reshape(invalid, \"b, T\", \"b, 1, T\")\n if isinstance(mask, th.Tensor):\n bias = (~mask).float() * -1e9\n elif mask:\n bias = get_attn_bias_cached(Q_bte.shape[1], K_bTe.shape[1], maxlen=maxlen, device=Q_bte.device, dtype=th.float32)\n else:\n bias = Q_bte.new_zeros((), dtype=th.float32)\n if extra_btT is not None:\n bias = bias + extra_btT\n # Equivalent to bias + (1 / math.sqrt(e)) * th.einsum(\"bte,bpe->btp\", Q_bte, K_bte)\n # but faster:\n logit_btT = th.baddbmm(\n bias,\n Q_bte.float(),\n K_bTe.float().transpose(-1, -2),\n alpha=(1 / e) if use_muP_factor else (1 / math.sqrt(e)),\n )\n if check_sentinel:\n logit_btT = logit_btT - 1e9 * invalid.float()\n W_btT = th.softmax(logit_btT, dim=2).to(dtype)\n if callable(V_bTe):\n # This is used by the sharded video model to defer waiting on\n # the broadcast of the values until they're needed\n V_bTe = V_bTe()\n # th.einsum only lets you use lowercase letters, so 'p' for 'past'"
+ },
+ {
+ "comment": "This code defines an attention mechanism class and a function to split input into heads. The attention mechanism is initialized with parameters such as number of heads, maximum length, and mask. The \"preproc_qkv\" and \"preproc_r\" methods for preprocessing Q, K, V, and R are not implemented yet. The code also includes the \"split_heads\" function to split input into multiple heads.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":68-106",
+ "content": " # means 'T'\n A_bte = th.einsum(\"btp,bpe->bte\", W_btT, V_bTe)\n return A_bte\nclass Attn:\n \"\"\"\n Defines an attention mechanism\n All the mechanisms here can be defined by two operations:\n 1. preprocessing Q,K,V,R[=relative attention query]\n to move axes from embedding dimension to\n batch dimension, and possibly doing shifts.\n 2. postprocessing the final result to move axes back to embedding\n axis.\n \"\"\"\n def __init__(self, mask, maxlen):\n self.mask = mask\n self.maxlen = maxlen\n def preproc_qkv(self, Q_bte, K_bte, V_bte):\n raise NotImplementedError\n def preproc_r(self, R_btn):\n raise NotImplementedError\ndef split_heads(x_bte, h):\n b, t, e = x_bte.shape\n assert e % h == 0, \"Embsize must be divisible by number of heads\"\n q = e // h\n x_bthq = x_bte.reshape((b, t, h, q))\n x_bhtq = misc.transpose(x_bthq, \"bthq\", \"bhtq\")\n x_Btq = x_bhtq.reshape((b * h, t, q))\n return x_Btq\nclass All2All(Attn):\n def __init__(self, nhead, maxlen, mask=True, head_dim=None):"
+ },
+ {
+ "comment": "This code initializes a class with optional nhead and head_dim arguments, and defines preproc_qkv and preproc_r functions to handle input shapes. It also includes a postproc_a function for reshaping the output shape. The _required_padding function checks if padding is needed for certain dimensions.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":107-137",
+ "content": " super().__init__(mask=mask, maxlen=maxlen)\n assert (nhead is None) != (head_dim is None), \"exactly one of nhead and head_dim must be specified\"\n self.h = nhead\n self.head_dim = head_dim\n def preproc_qkv(self, *xs):\n q = xs[0].shape[-1]\n for x in xs:\n assert x.shape[-1] == q, \"embedding dimensions do not match\"\n h = self.h or misc.exact_div(q, self.head_dim)\n postproc = functools.partial(self.postproc_a, h=h)\n return (postproc, *tuple(split_heads(x, h) for x in xs))\n def preproc_r(self, R_btn):\n _, ret = self.preproc_qkv(R_btn)\n return ret\n def postproc_a(self, A_Btq, h):\n B, t, q = A_Btq.shape\n b = B // h\n A_bhtq = A_Btq.reshape((b, h, t, q))\n A_bthq = misc.transpose(A_bhtq, \"bhtq\", \"bthq\")\n A_bte = A_bthq.reshape((b, t, h * q))\n return A_bte\ndef _required_padding(dim, target_div):\n if dim % target_div == 0:\n return 0\n else:\n return target_div - dim % target_div"
+ },
+ {
+ "comment": "This code defines a StridedAttn class which is a subclass of Attn. The __init__ method initializes the number of heads, stride, maximum length, and whether or not to use a mask. The _preproc method preprocesses input data by reshaping, padding if necessary, and defining undo operations for later use. It also checks that the query tensor length is divisible by the maximum length.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":140-162",
+ "content": "class StridedAttn(Attn):\n def __init__(self, nhead, stride, maxlen, mask=True):\n super().__init__(mask=mask, maxlen=maxlen)\n self.h = nhead\n self.stride = stride\n def _preproc(self, x, name, Q_t=None, Q_pad=None):\n x, undo = misc.reshape_undo(x, \"b, t*stride, e\", \"b, 1, t, stride*e\", stride=self.stride)\n if name == \"Q\":\n Q_pad = _required_padding(x.shape[2], self.maxlen)\n original_t = x.shape[2]\n x = F.pad(x, (0, 0, 0, Q_pad), value=SENTINEL)\n undo = misc.compose_undo(undo, lambda x: x[:, :, :original_t])\n if name == \"Q\":\n Q_t = x.shape[2]\n assert Q_t % self.maxlen == 0, f\"{Q_t} % {self.maxlen} != 0\"\n else:\n required_len = Q_t + self.maxlen\n if x.shape[2] < required_len:\n x = F.pad(x, (0, 0, required_len - x.shape[2], 0), value=SENTINEL)\n assert x.shape[2] >= required_len\n back = x[:, :, -Q_t - self.maxlen : -self.maxlen]\n front = x[:, :, -Q_t:]"
+ },
+ {
+ "comment": "This code block preprocesses input data for a deep learning model. It performs operations like concatenation, reshaping, and transposition to prepare the data in a suitable format for further processing. The code also includes padding operations to handle data with different dimensions.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":163-191",
+ "content": " x = th.cat([back, front], dim=1)\n _, _, t, _ = x.shape\n assert t == Q_t, f\"{t} != {Q_t}\"\n x, undo = misc.reshape_undo(\n x,\n \"b, pad_shift, t*maxlen, stride*h*q\",\n \"b, pad_shift, t, maxlen, stride, h, q\",\n maxlen=self.maxlen,\n h=self.h,\n stride=self.stride,\n undo=undo,\n )\n x, undo = misc.transpose_undo(x, \"bptmshq\", \"bthspmq\", undo=undo)\n x, undo = misc.reshape_undo(\n x,\n \"b, t, h, stride, pad_shift, maxlen, q\",\n \"b*t*h*stride, pad_shift*maxlen, q\",\n undo=undo,\n )\n if name == \"Q\":\n return x, undo, Q_t, Q_pad\n else:\n return x\n def preproc_qkv(self, Q_bte, K_bte, V_bte):\n pad = _required_padding(Q_bte.shape[1], self.stride)\n if pad:\n Q_bte = F.pad(Q_bte, (0, 0, 0, pad), value=SENTINEL)\n K_bte = F.pad(K_bte, (0, 0, 0, pad), value=SENTINEL) if K_bte is not None else None"
+ },
+ {
+ "comment": "This code performs preprocessing for query (Q), key (K), and value (V) tensors in a transformer model. If any of the tensors are None, they are padded with a sentinel value. The function returns preprocessing results including postprocessing operations (postproc) and prepared Q, K, and V tensors for training or inference.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":192-214",
+ "content": " V_bte = F.pad(V_bte, (0, 0, 0, pad), value=SENTINEL) if V_bte is not None else None\n undo = lambda x, pad=pad: x[:, :-pad]\n else:\n undo = None\n if K_bte is not None:\n pad = _required_padding(K_bte.shape[1], self.stride)\n if pad:\n K_bte = F.pad(K_bte, (0, 0, pad, 0), value=SENTINEL)\n V_bte = F.pad(V_bte, (0, 0, pad, 0), value=SENTINEL)\n assert Q_bte.shape[1] % self.stride == 0\n assert K_bte is None or K_bte.shape[1] % self.stride == 0\n assert V_bte is None or V_bte.shape[1] % self.stride == 0\n Q, postproc, Q_t, Q_pad = self._preproc(Q_bte, \"Q\")\n postproc = misc.compose_undo(undo, postproc)\n return (\n postproc,\n Q,\n self._preproc(K_bte, \"K\", Q_t=Q_t, Q_pad=Q_pad) if K_bte is not None else None,\n self._preproc(V_bte, \"V\", Q_t=Q_t, Q_pad=Q_pad) if V_bte is not None else None,\n )\n def preproc_r(self, R_bte):\n _, R, _, _ = self.preproc_qkv(R_bte, None, None)"
+ },
+ {
+ "comment": "This code defines an `AttentionLayerBase` class that inherits from `nn.Module`. It takes in several parameters such as `attn`, `scale`, `x_size`, `c_size`, `qk_size`, `v_size`, `dtype`, `relattn`, and `seqlens`. Inside the class, it initializes multiple layers using `MultiscaleLinear` with different scales and sizes based on the input parameters. These layers are used for query (Q), key (K), value (V) computations, and projection.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":215-253",
+ "content": " return R\nQ_SCALE = 0.1\nK_SCALE = 0.2\nV_SCALE = 1.0\nPROJ_SCALE = 1.0\nMLP0_SCALE = 1.0\nMLP1_SCALE = 1.0\nR_SCALE = 0.1\nB_SCALE = 0.2\nclass AttentionLayerBase(nn.Module):\n def __init__(\n self,\n *,\n attn,\n scale,\n x_size,\n c_size,\n qk_size,\n v_size,\n dtype,\n relattn=False,\n seqlens=None,\n separate=False,\n ):\n super().__init__()\n dtype = tu.parse_dtype(dtype)\n self.attn = attn\n self.x_size = x_size\n self.c_size = c_size\n s = math.sqrt(scale)\n separgs = dict(seqlens=seqlens, separate=separate)\n self.q_layer = MultiscaleLinear(x_size, qk_size, name=\"q\", scale=Q_SCALE, dtype=dtype, **separgs)\n self.k_layer = MultiscaleLinear(c_size, qk_size, name=\"k\", scale=K_SCALE, bias=False, dtype=dtype, **separgs)\n self.v_layer = MultiscaleLinear(c_size, v_size, name=\"v\", scale=V_SCALE * s, bias=False, dtype=dtype, **separgs)\n self.proj_layer = MultiscaleLinear(v_size, x_size, name=\"proj\", scale=PROJ_SCALE * s, dtype=dtype, **separgs)"
+ },
+ {
+ "comment": "This code defines a class called SelfAttentionLayer which inherits from AttentionLayerBase. It initializes the relattn attribute, checks if relattn is set, and then initializes r_layer and b_nd if relattn is true. The maxlen, dtype attributes are also initialized based on the input attn. Finally, a relattn_logits method is defined to compute the relative attention logits for the input X_bte and T. Additionally, there are two helper functions: relu, gelu, and none act as activation functions which can be applied to the output of the layer.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":254-290",
+ "content": " self.relattn = relattn\n maxlen = attn.maxlen\n assert maxlen > 0 or not attn.mask\n if self.relattn:\n nbasis = 10\n self.r_layer = tu.NormedLinear(x_size, nbasis * attn.h, scale=R_SCALE, dtype=dtype)\n self.b_nd = nn.Parameter(th.randn(nbasis, maxlen) * B_SCALE)\n self.maxlen = maxlen\n self.dtype = dtype\n def relattn_logits(self, X_bte, T):\n R_btn = self.r_layer(X_bte).float()\n R_btn = self.attn.preproc_r(R_btn)\n t = R_btn.shape[1]\n D_ntT = util.bandify(self.b_nd, t, T)\n extra_btT = th.einsum(\"btn,ntp->btp\", R_btn, D_ntT)\n return extra_btT\ndef quick_gelu(x):\n return x * th.sigmoid(1.702 * x)\ndef act(actname, x):\n if actname == \"relu\":\n return F.relu(x)\n elif actname == \"gelu\":\n return quick_gelu(x)\n elif actname == \"none\":\n return x\n else:\n raise NotImplementedError(actname)\nclass SelfAttentionLayer(AttentionLayerBase):\n \"\"\"\n Residual attention layer that takes a single tensor x and has it attend to itself"
+ },
+ {
+ "comment": "This code defines a class constructor for an Attention module. It initializes the object with various parameters and sets up some attributes like normalization layers and cache lengths.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":291-330",
+ "content": " Has the form\n output = x + f(x)\n \"\"\"\n def __init__(\n self,\n x_size,\n attn,\n scale,\n dtype=\"float32\",\n norm=\"layer\",\n cache_keep_len=None,\n relattn=False,\n log_scope=\"sa\",\n use_muP_factor=False,\n **kwargs,\n ):\n super().__init__(\n x_size=x_size,\n c_size=x_size,\n qk_size=x_size,\n v_size=x_size,\n attn=attn,\n scale=scale,\n relattn=relattn,\n dtype=dtype,\n **kwargs,\n )\n self.ln_x = util.get_norm(norm, x_size, dtype=dtype)\n if cache_keep_len is None:\n if hasattr(attn, \"cache_keep_len\"):\n cache_keep_len = attn.cache_keep_len\n else:\n if isinstance(attn, StridedAttn):\n stride = attn.stride\n else:\n stride = 1\n cache_keep_len = stride * attn.maxlen\n self.cache_keep_len = cache_keep_len\n self.log_scope = log_scope"
+ },
+ {
+ "comment": "This code defines a class with two methods: \"residual\" and \"forward\". The \"residual\" method applies attention to input data, using a self-attention mechanism. It also allows for updating the state based on an argument passed in. The \"forward\" method is a wrapper around the \"residual\" method which also returns the updated state.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":331-358",
+ "content": " self.use_muP_factor = use_muP_factor\n def residual(self, X_bte, state):\n X_bte = self.ln_x(X_bte)\n Q_bte = self.q_layer(X_bte)\n K_bte = self.k_layer(X_bte)\n V_bte = self.v_layer(X_bte)\n if state:\n state, K_bte, V_bte = self.update_state(state, K_bte, V_bte)\n postproc_closure, Q_bte, K_bte, V_bte = self.attn.preproc_qkv(Q_bte, K_bte, V_bte)\n extra_btT = self.relattn_logits(X_bte, K_bte.shape[1]) if self.relattn else None\n A_bte = attention(\n Q_bte,\n K_bte,\n V_bte,\n mask=self.attn.mask,\n extra_btT=extra_btT,\n maxlen=self.maxlen,\n dtype=self.dtype,\n check_sentinel=isinstance(self.attn, StridedAttn),\n use_muP_factor=self.use_muP_factor,\n )\n A_bte = postproc_closure(A_bte)\n Aproj_bte = self.proj_layer(A_bte)\n return Aproj_bte, state\n def forward(self, X_bte, state):\n R_bte, state = self.residual(X_bte, state)"
+ },
+ {
+ "comment": "The code defines three functions for a neural network:\n1. `forward` performs the forward pass of the network, taking input X_bte and state as arguments, and returns output and updated state.\n2. `stateless_forward` performs a forward pass without considering the state from the previous timestep, only taking input X_bte as an argument.\n3. `update_state` updates the network's internal state based on current and cached keys (K_bte and V_bte), returning the updated cache and full key matrix for the next timestep.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":359-381",
+ "content": " return X_bte + R_bte, state\n def stateless_forward(self, X_bte):\n out_bte, _state = self.forward(X_bte, None)\n return out_bte\n def update_state(self, state, K_bte, V_bte):\n def append(prev, new):\n \"\"\"\n Given `prev` keys from cache, and `new` keys,\n returns (cache, full), where\n - cache goes into the output state, length chosen so that on the\n next timestep, there are enough cached timesteps to get the full\n context of lenth self.maxlen.\n - full is used for the current forward pass, with length chosen so\n that the first timestep new[:, 0] gets to see a context of\n self.maxlen.\n \"\"\"\n tprev = prev.shape[1]\n startfull = max(tprev - self.cache_keep_len, 0)\n full = th.cat([prev[:, startfull:], new], dim=1)\n outstate = full[:, max(full.shape[1] - (self.cache_keep_len), 0) :]\n # To see that the preceding slicing is correct, consider the case"
+ },
+ {
+ "comment": "The code initializes a residual MLP layer with a specified size, scale, data type, normalization method, activation function, and a ratio for the multi-layer perceptron (MLP). The class PointwiseLayer inherits from nn.Module and contains an instance of the Linear layer and a normalization layer, as well as a method to apply the MLP at each timestep in the input sequence.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":382-413",
+ "content": " # that maxlen==1. Then `full` only consists of `new`, and\n # `outstate` is empty\n return outstate, full\n instate_K, instate_V = state\n outstate_K, K_bte = append(instate_K, K_bte)\n outstate_V, V_bte = append(instate_V, V_bte)\n assert outstate_K.shape[-2] <= self.cache_keep_len\n return (outstate_K, outstate_V), K_bte, V_bte\n def initial_state(self, batchsize, initial_T=0):\n return (\n tu.zeros((batchsize, initial_T, self.x_size), dtype=self.dtype),\n tu.zeros((batchsize, initial_T, self.x_size), dtype=self.dtype),\n )\n def empty_state(self):\n return None\nclass PointwiseLayer(nn.Module):\n \"\"\"\n Residual MLP applied at each timestep\n \"\"\"\n def __init__(self, x_size, scale, dtype, norm, actname=\"relu\", mlp_ratio=2):\n super().__init__()\n s = math.sqrt(scale)\n self.ln = util.get_norm(norm, x_size, dtype=dtype)\n self.mlp = mlp.MLP(\n insize=x_size,\n nhidlayer=1,"
+ },
+ {
+ "comment": "This code defines a class and a function for creating instances of a module, either for all resolutions or separate instances for each resolution based on the \"separate\" parameter. The class has an initializer that sets up the module's layers and applies scaling to their weights. The forward function performs a residual connection with the module. The _is_separate function checks if a separate instance should be created for the given name, removing it from the set if it should be created separately. The make_maybe_multiscale function creates either one instance or multiple instances of the module based on the separate parameter.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":414-449",
+ "content": " outsize=x_size,\n hidsize=int(x_size * mlp_ratio),\n hidactiv=functools.partial(act, actname),\n dtype=dtype,\n )\n self.mlp.layers[0].weight.data *= MLP0_SCALE * s\n self.mlp.layers[1].weight.data *= MLP1_SCALE * s\n def residual(self, x):\n x = self.ln(x)\n x = self.mlp(x)\n return x\n def forward(self, x):\n return x + self.residual(x)\ndef _is_separate(sep, name):\n if isinstance(sep, bool):\n return sep\n assert isinstance(sep, set)\n if name in sep:\n sep.remove(name)\n return True\n else:\n return False\ndef make_maybe_multiscale(make_fn, *args, seqlens, separate, name, **kwargs):\n \"\"\"\n This function either creates one instance of a module or creates\n a separate instance of the module for each resolution of the image,\n determined by the `separate` parameter. We create separate modules\n if `separate` is True or if `separate` is a set containing `name`.\n \"\"\"\n if _is_separate(separate, name):"
+ },
+ {
+ "comment": "This code defines a function `SplitCallJoin` that takes a list of modules and sequence lengths as inputs. It initializes the `SplitCallJoin` class, which splits the input tensor into multiple smaller tensors based on the sequence lengths, applies each module in parallel, and then concatenates the results back together. The function also defines two partial functions `MultiscaleLinear` and `MultiscalePointwise` using `functools.partial` to create variants of `make_maybe_multiscale` for `tu.NormedLinear` and `PointwiseLayer`, respectively.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/xf.py\":450-478",
+ "content": " modules = [make_fn(*args, **kwargs) for _ in seqlens]\n return SplitCallJoin(modules, seqlens)\n else:\n return make_fn(*args, **kwargs)\nclass SplitCallJoin(nn.Module):\n def __init__(self, mods, seqlens):\n super().__init__()\n self.mods = nn.ModuleList(mods)\n self.seqlens = seqlens\n def forward(self, x):\n tl = sum(self.seqlens)\n x, undo = misc.reshape_undo(x, \"..., z*tl, e\", \"..., z, tl, e\", tl=tl)\n x = list(th.split(x, self.seqlens, dim=-2))\n new_x = []\n for x, mod in misc.safezip(x, self.mods):\n x, this_undo = misc.reshape_undo(x, \"..., z, l, e\", \"..., z*l, e\")\n x = mod(x)\n x = this_undo(x)\n new_x.append(x)\n x = th.cat(new_x, dim=-2)\n x = undo(x)\n return x\nMultiscaleLinear = functools.partial(make_maybe_multiscale, tu.NormedLinear)\nMultiscalePointwise = functools.partial(make_maybe_multiscale, PointwiseLayer)"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/df146e51-7408-4269-aea9-9bd8aef08073.json b/docs/doc/df146e51-7408-4269-aea9-9bd8aef08073.json
new file mode 100644
index 0000000..7033e3d
--- /dev/null
+++ b/docs/doc/df146e51-7408-4269-aea9-9bd8aef08073.json
@@ -0,0 +1,10 @@
+{
+ "summary": "MLP class defines a neural network with specified input, hidden, and output layers. It uses normed linear layers and applies the specified activation function to hidden layers.",
+ "details": [
+ {
+ "comment": "MLP class defines a neural network with specified input, hidden, and output layers. It uses normed linear layers and applies the specified activation function to hidden layers.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/mlp.py\":0-30",
+ "content": "import torch as th\nfrom torch import nn\nfrom lib import misc\nfrom lib import torch_util as tu\nclass MLP(nn.Module):\n def __init__(self, insize, nhidlayer, outsize, hidsize, hidactiv, dtype=th.float32):\n super().__init__()\n self.insize = insize\n self.nhidlayer = nhidlayer\n self.outsize = outsize\n in_sizes = [insize] + [hidsize] * nhidlayer\n out_sizes = [hidsize] * nhidlayer + [outsize]\n self.layers = nn.ModuleList(\n [tu.NormedLinear(insize, outsize, dtype=dtype) for (insize, outsize) in misc.safezip(in_sizes, out_sizes)]\n )\n self.hidactiv = hidactiv\n def forward(self, x):\n *hidlayers, finallayer = self.layers\n for layer in hidlayers:\n x = layer(x)\n x = self.hidactiv(x)\n x = finallayer(x)\n return x\n @property\n def output_shape(self):\n return (self.outsize,)"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/e31cbbf0-b729-43d3-860a-0c65c2b4ba6c.json b/docs/doc/e31cbbf0-b729-43d3-860a-0c65c2b4ba6c.json
new file mode 100644
index 0000000..b9e3825
--- /dev/null
+++ b/docs/doc/e31cbbf0-b729-43d3-860a-0c65c2b4ba6c.json
@@ -0,0 +1,35 @@
+{
+ "summary": "The ImpalaCNN architecture is created with optional group normalization, allowing for customizable input shape, downsample stacks, output hidden size, and residual blocks per stack. It inherits from a base class and utilizes 2D convolutional layers for multi-stack classification models.",
+ "details": [
+ {
+ "comment": "This code defines a CnnBasicBlock class for ImpalaCNN, which is a residual basic block that preserves the number of input channels and the shape. It uses FanInInitReLULayer for the convolutional layers and allows adjusting weight initialization scale, log scope, and initialization normalization parameters.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/impala_cnn.py\":0-41",
+ "content": "import math\nfrom copy import deepcopy\nfrom typing import Dict, List, Optional\nfrom torch import nn\nfrom torch.nn import functional as F\nfrom lib import misc\nfrom lib import torch_util as tu\nfrom lib.util import FanInInitReLULayer\nclass CnnBasicBlock(nn.Module):\n \"\"\"\n Residual basic block, as in ImpalaCNN. Preserves channel number and shape\n :param inchan: number of input channels\n :param init_scale: weight init scale multiplier\n \"\"\"\n def __init__(\n self,\n inchan: int,\n init_scale: float = 1,\n log_scope=\"\",\n init_norm_kwargs: Dict = {},\n **kwargs,\n ):\n super().__init__()\n self.inchan = inchan\n s = math.sqrt(init_scale)\n self.conv0 = FanInInitReLULayer(\n self.inchan,\n self.inchan,\n kernel_size=3,\n padding=1,\n init_scale=s,\n log_scope=f\"{log_scope}/conv0\",\n **init_norm_kwargs,\n )\n self.conv1 = FanInInitReLULayer(\n self.inchan,\n self.inchan,"
+ },
+ {
+ "comment": "This code defines two classes: `ImpalaCnnConv1d` and `CnnDownStack`. The `ImpalaCnnConv1d` class represents a 1-dimensional convolutional layer with specific parameters, while the `CnnDownStack` class is a stack of downsampling blocks using the `ImpalaCnnConv1d` as the base. These classes are used for image classification tasks following the Impala CNN architecture.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/impala_cnn.py\":42-77",
+ "content": " kernel_size=3,\n padding=1,\n init_scale=s,\n log_scope=f\"{log_scope}/conv1\",\n **init_norm_kwargs,\n )\n def forward(self, x):\n x = x + self.conv1(self.conv0(x))\n return x\nclass CnnDownStack(nn.Module):\n \"\"\"\n Downsampling stack from Impala CNN.\n :param inchan: number of input channels\n :param nblock: number of residual blocks after downsampling\n :param outchan: number of output channels\n :param init_scale: weight init scale multiplier\n :param pool: if true, downsample with max pool\n :param post_pool_groups: if not None, normalize with group norm with this many groups\n :param kwargs: remaining kwargs are passed into the blocks and layers\n \"\"\"\n name = \"Impala_CnnDownStack\"\n def __init__(\n self,\n inchan: int,\n nblock: int,\n outchan: int,\n init_scale: float = 1,\n pool: bool = True,\n post_pool_groups: Optional[int] = None,\n log_scope: str = \"\",\n init_norm_kwargs: Dict = {},"
+ },
+ {
+ "comment": "This code initializes a CNN architecture with optional group normalization. It takes parameters such as input and output channels, pooling size, and whether to use group normalization for the first convolution layer or not. The code also includes a list of blocks, where each block is an instance of CnnBasicBlock.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/impala_cnn.py\":78-106",
+ "content": " first_conv_norm=False,\n **kwargs,\n ):\n super().__init__()\n self.inchan = inchan\n self.outchan = outchan\n self.pool = pool\n first_conv_init_kwargs = deepcopy(init_norm_kwargs)\n if not first_conv_norm:\n first_conv_init_kwargs[\"group_norm_groups\"] = None\n first_conv_init_kwargs[\"batch_norm\"] = False\n self.firstconv = FanInInitReLULayer(\n inchan,\n outchan,\n kernel_size=3,\n padding=1,\n log_scope=f\"{log_scope}/firstconv\",\n **first_conv_init_kwargs,\n )\n self.post_pool_groups = post_pool_groups\n if post_pool_groups is not None:\n self.n = nn.GroupNorm(post_pool_groups, outchan)\n self.blocks = nn.ModuleList(\n [\n CnnBasicBlock(\n outchan,\n init_scale=init_scale / math.sqrt(nblock),\n log_scope=f\"{log_scope}/block{i}\",\n init_norm_kwargs=init_norm_kwargs,"
+ },
+ {
+ "comment": "This code defines a class for an ImpalaCNN model, which is a residual convolutional neural network. The constructor takes input image shape, number of residual downsample stacks, output hidden size, and number of residual blocks per stack as parameters. The forward method performs the forward pass through the network, and the output_shape method returns the expected output shape given the input shape.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/impala_cnn.py\":107-138",
+ "content": " **kwargs,\n )\n for i in range(nblock)\n ]\n )\n def forward(self, x):\n x = self.firstconv(x)\n if self.pool:\n x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)\n if self.post_pool_groups is not None:\n x = self.n(x)\n x = tu.sequential(self.blocks, x, diag_name=self.name)\n return x\n def output_shape(self, inshape):\n c, h, w = inshape\n assert c == self.inchan\n if self.pool:\n return (self.outchan, (h + 1) // 2, (w + 1) // 2)\n else:\n return (self.outchan, h, w)\nclass ImpalaCNN(nn.Module):\n \"\"\"\n :param inshape: input image shape (height, width, channels)\n :param chans: number of residual downsample stacks. Each element is the number of\n filters per convolution in the stack\n :param outsize: output hidden size\n :param nblock: number of residual blocks per stack. Each block has 2 convs and a residual\n :param init_norm_kwargs: arguments to be passed to convolutional layers. Options can be found"
+ },
+ {
+ "comment": "This code defines a class called \"ImpalaCNN\" which inherits from the base class. It takes in parameters such as input shape, number of channels, output size, number of blocks, initialization arguments for normalization layers, and additional keyword arguments. The class initializes a list of CNN downstack modules and sets their configurations based on the input parameters.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/impala_cnn.py\":139-170",
+ "content": " in ypt.model.util:FanInInitReLULayer\n :param dense_init_norm_kwargs: arguments to be passed to convolutional layers. Options can be found\n in ypt.model.util:FanInInitReLULayer\n :param kwargs: remaining kwargs are passed into the CnnDownStacks\n \"\"\"\n name = \"ImpalaCNN\"\n def __init__(\n self,\n inshape: List[int],\n chans: List[int],\n outsize: int,\n nblock: int,\n init_norm_kwargs: Dict = {},\n dense_init_norm_kwargs: Dict = {},\n first_conv_norm=False,\n **kwargs,\n ):\n super().__init__()\n h, w, c = inshape\n curshape = (c, h, w)\n self.stacks = nn.ModuleList()\n for i, outchan in enumerate(chans):\n stack = CnnDownStack(\n curshape[0],\n nblock=nblock,\n outchan=outchan,\n init_scale=math.sqrt(len(chans)),\n log_scope=f\"downstack{i}\",\n init_norm_kwargs=init_norm_kwargs,\n first_conv_norm=first_conv_norm if i == 0 else True,"
+ },
+ {
+ "comment": "This code initializes a CNN model with multiple stacked 2D convolutional layers. The output of each stack is used as input to the next stack until the final dense layer for classification.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/impala_cnn.py\":171-194",
+ "content": " **kwargs,\n )\n self.stacks.append(stack)\n curshape = stack.output_shape(curshape)\n self.dense = FanInInitReLULayer(\n misc.intprod(curshape),\n outsize,\n layer_type=\"linear\",\n log_scope=\"imapala_final_dense\",\n init_scale=1.4,\n **dense_init_norm_kwargs,\n )\n self.outsize = outsize\n def forward(self, x):\n b, t = x.shape[:-3]\n x = x.reshape(b * t, *x.shape[-3:])\n x = misc.transpose(x, \"bhwc\", \"bchw\")\n x = tu.sequential(self.stacks, x, diag_name=self.name)\n x = x.reshape(b, t, *x.shape[1:])\n x = tu.flatten_image(x)\n x = self.dense(x)\n return x"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/f0b9f841-a34f-4c6a-9af1-d1ef0e9df343.json b/docs/doc/f0b9f841-a34f-4c6a-9af1-d1ef0e9df343.json
new file mode 100644
index 0000000..5b1ed12
--- /dev/null
+++ b/docs/doc/f0b9f841-a34f-4c6a-9af1-d1ef0e9df343.json
@@ -0,0 +1,35 @@
+{
+ "summary": "The code handles libraries, device defaults, tensor and layer functions, CUDA availability, LayerNorm creation, dimension flattening, sequential application of layers, parameter loading from multiple paths, and function state saving. The function takes a dtype string and converts it to PyTorch tensor data type, with an index function for batched broadcasting 'xi' along specified 'gather_dim'.",
+ "details": [
+ {
+ "comment": "This code imports various libraries, defines a function to convert context managers into decorators, checks if CUDA is available, sets the default device as either CUDA or CPU depending on availability, and then defines functions for creating tensors with zeros.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/torch_util.py\":0-56",
+ "content": "import functools\nimport itertools\nimport math\nimport os\nimport pickle\nimport re\nimport subprocess\nimport tempfile\nfrom contextlib import contextmanager\nfrom hashlib import md5, sha1\nimport numpy as np\nimport torch as th\nimport torch.distributed as dist\nimport torch.distributions as dis\nimport torch.nn.functional as F\nfrom torch import nn\nimport lib.tree_util as tree_util\nfrom lib import misc\ndef contextmanager_to_decorator(cm):\n def decorator(fn):\n @functools.wraps(fn)\n def newfn(*args, **kwargs):\n with cm():\n return fn(*args, **kwargs)\n return newfn\n return decorator\ndef have_cuda():\n return th.has_cuda\ndef default_device_type():\n return \"cuda\" if have_cuda() else \"cpu\"\nno_grad = contextmanager_to_decorator(th.no_grad)\nDEFAULT_DEVICE = th.device(type=default_device_type())\ndef set_default_torch_device(device):\n global DEFAULT_DEVICE\n DEFAULT_DEVICE = th.device(device)\ndef dev():\n return DEFAULT_DEVICE\ndef zeros(*args, **kwargs):\n return th.zeros(*args, **kwargs, device=dev())"
+ },
+ {
+ "comment": "Code defines functions for creating normalized Linear layers, F16 linear and LayerNorm modules. It also includes a utility function to create Tensor objects on the device specified by dev() function.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/torch_util.py\":59-95",
+ "content": "def ones(*args, **kwargs):\n return th.ones(*args, **kwargs, device=dev())\ndef arange(*args, **kwargs):\n return th.arange(*args, **kwargs, device=dev())\ndef NormedLinear(*args, scale=1.0, dtype=th.float32, **kwargs):\n \"\"\"\n nn.Linear but with normalized fan-in init\n \"\"\"\n dtype = parse_dtype(dtype)\n if dtype == th.float32:\n out = nn.Linear(*args, **kwargs)\n elif dtype == th.float16:\n out = LinearF16(*args, **kwargs)\n else:\n raise ValueError(dtype)\n out.weight.data *= scale / out.weight.norm(dim=1, p=2, keepdim=True)\n if kwargs.get(\"bias\", True):\n out.bias.data *= 0\n return out\nclass LinearF16(nn.Linear):\n def forward(self, x):\n return F.linear(x, self.weight.half(), self.bias.half() if self.bias is not None else None)\nclass LayerNormF16(nn.LayerNorm):\n def forward(self, x):\n return F.layer_norm(x, self.normalized_shape, self.weight.half(), self.bias.half(), self.eps)\ndef LayerNorm(*args, dtype=th.float32, **kwargs):\n dtype = parse_dtype(dtype)"
+ },
+ {
+ "comment": "- Code snippets from \"Video-Pre-Training/lib/torch_util.py\":\n- 96-130: LayerNorm creation depending on dtype (float32, float16), sets weight no_scale to True.\n- flatten_image: Flattens the last three dimensions of a tensor.\n- sequential: Applies layers in order to input tensor, returns final result.\n- load_average_with_metadata: Loads models from multiple paths and averages their parameters.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/torch_util.py\":96-130",
+ "content": " if dtype == th.float32:\n out = nn.LayerNorm(*args, **kwargs)\n elif dtype == th.float16:\n out = LayerNormF16(*args, **kwargs)\n else:\n raise ValueError(dtype)\n out.weight.no_scale = True\n return out\ndef flatten_image(x):\n \"\"\"\n Flattens last three dims\n \"\"\"\n *batch_shape, h, w, c = x.shape\n return x.reshape((*batch_shape, h * w * c))\ndef sequential(layers, x, *args, diag_name=None, use_checkpoint=False):\n for (i, layer) in enumerate(layers):\n x = layer(x, *args)\n return x\n@no_grad\ndef load_average_with_metadata(paths, overrides):\n n_models = len(paths)\n model, metadata = load_with_metadata(paths[0], overrides=overrides)\n for p in model.parameters():\n p.mul_(1 / n_models)\n for p in paths[1:]:\n new_model, _ = load_with_metadata(p, overrides=overrides)\n for (n1, p1), (n2, p2) in misc.safezip(model.named_parameters(), new_model.named_parameters()):\n assert n1 == n2, f\"names {n1} and {n2} don't match\"\n p1.add_(p2.mul_(1 / n_models))"
+ },
+ {
+ "comment": "The code defines a decorator, save_kwargs, that allows saving the function and its arguments used to create a PyTorch module, enabling later restoration of the model state. It also includes a utility function, parse_dtype, for converting data types into their equivalent PyTorch dtype objects.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/torch_util.py\":131-164",
+ "content": " return model, metadata\ndef save_kwargs(fn):\n \"\"\"\n This decorator passes through the user-provided kwargs and adds one more, called\n save_kwargs, mapping to {\"create_fn\" : name_of_decorated_fn, \"kwargs\" : other_kwargs}\n You put on this decorator on a function that creates a pytorch module. This will\n save the kwargs and the function that was used to create the module.\n This lets us restore the model state later.\n \"\"\"\n @functools.wraps(fn)\n def wrapper(**kwargs):\n if \"save_kwargs\" in kwargs:\n return fn(**kwargs)\n else:\n sk = {**kwargs, \"create_fn\": f\"{fn.__module__}:{fn.__name__}\"}\n return fn(save_kwargs=sk, **kwargs)\n return wrapper\ndef parse_dtype(x):\n if isinstance(x, th.dtype):\n return x\n elif isinstance(x, str):\n if x == \"float32\" or x == \"float\":\n return th.float32\n elif x == \"float64\" or x == \"double\":\n return th.float64\n elif x == \"float16\" or x == \"half\":\n return th.float16"
+ },
+ {
+ "comment": "This function parses a dtype string and returns the corresponding PyTorch tensor data type. It also provides an index function for batched, broadcasting index of x along dimension i.ndim. The index function ensures that the input shape is compatible with the tensor shape and expands or gathers the tensor accordingly.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/torch_util.py\":165-198",
+ "content": " elif x == \"uint8\":\n return th.uint8\n elif x == \"int8\":\n return th.int8\n elif x == \"int16\" or x == \"short\":\n return th.int16\n elif x == \"int32\" or x == \"int\":\n return th.int32\n elif x == \"int64\" or x == \"long\":\n return th.int64\n elif x == \"bool\":\n return th.bool\n else:\n raise ValueError(f\"cannot parse {x} as a dtype\")\n else:\n raise TypeError(f\"cannot parse {type(x)} as dtype\")\ndef index(x, i):\n \"\"\"\n Batched, broadcasting index of x along dimension i.ndim.\n For example, if x has shape (1, 2, 3, 4, 5) and i has shape (1, 1, 3)\n then the result has shape (1, 2, 3, 5) and each value in i must be between 0 and 3.\n \"\"\"\n assert x.ndim >= i.ndim + 1\n gather_dim = i.ndim\n while i.ndim < x.ndim:\n i = i.unsqueeze(-1)\n expand_shape = list(x.shape)\n expand_shape[gather_dim] = 1\n i = i.expand(*expand_shape)\n xi = th.gather(x, gather_dim, i)\n assert xi.shape[gather_dim] == 1"
+ },
+ {
+ "comment": "This function is squeezing the dimensions of the tensor 'xi' based on the value in 'gather_dim'.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/lib/torch_util.py\":199-199",
+ "content": " return xi.squeeze(gather_dim)"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/doc/f800cd5e-0783-4f3e-bdde-61feee92aa21.json b/docs/doc/f800cd5e-0783-4f3e-bdde-61feee92aa21.json
new file mode 100644
index 0000000..62f6285
--- /dev/null
+++ b/docs/doc/f800cd5e-0783-4f3e-bdde-61feee92aa21.json
@@ -0,0 +1,15 @@
+{
+ "summary": "The code imports libraries, defines a function 'main' that loads and uses a pre-trained model in the MineRL environment, taking two arguments: the path to the model file and weights file. It also adds an optional argument \"--model\" of type string for the file path loading.",
+ "details": [
+ {
+ "comment": "The code imports necessary libraries and defines a function named \"main\" which loads a pre-trained model, creates an agent, and then launches the MineRL environment. The main function takes two arguments: 'model', the path to the pickle file containing the loaded model's parameters; and 'weights', the path to the '.weights' file to be loaded. It then continuously takes actions in the environment based on the pre-trained agent's recommendations until the MineRL environment is completed or terminated.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/run_agent.py\":0-29",
+ "content": "from argparse import ArgumentParser\nimport pickle\nfrom minerl.herobraine.env_specs.human_survival_specs import HumanSurvival\nfrom agent import MineRLAgent, ENV_KWARGS\ndef main(model, weights):\n env = HumanSurvival(**ENV_KWARGS).make()\n print(\"---Loading model---\")\n agent_parameters = pickle.load(open(model, \"rb\"))\n policy_kwargs = agent_parameters[\"model\"][\"args\"][\"net\"][\"args\"]\n pi_head_kwargs = agent_parameters[\"model\"][\"args\"][\"pi_head_opts\"]\n pi_head_kwargs[\"temperature\"] = float(pi_head_kwargs[\"temperature\"])\n agent = MineRLAgent(env, policy_kwargs=policy_kwargs, pi_head_kwargs=pi_head_kwargs)\n agent.load_weights(weights)\n print(\"---Launching MineRL enviroment (be patient)---\")\n obs = env.reset()\n while True:\n minerl_action = agent.get_action(obs)\n obs, reward, done, info = env.step(minerl_action)\n env.render()\nif __name__ == \"__main__\":\n parser = ArgumentParser(\"Run pretrained models on MineRL environment\")\n parser.add_argument(\"--weights\", type=str, required=True, help=\"Path to the '.weights' file to be loaded.\")"
+ },
+ {
+ "comment": "This code is adding a required argument \"--model\" to the parser, specifying its type as string and loading the file path from this argument.",
+ "location": "\"/media/root/Toshiba XG3/works/Video-Pre-Training/docs/src/run_agent.py\":30-34",
+ "content": " parser.add_argument(\"--model\", type=str, required=True, help=\"Path to the '.model' file to be loaded.\")\n args = parser.parse_args()\n main(args.model, args.weights)"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/docs/github-markdown.css b/docs/github-markdown.css
new file mode 100644
index 0000000..96a4f29
--- /dev/null
+++ b/docs/github-markdown.css
@@ -0,0 +1,1197 @@
+@media (prefers-color-scheme: dark) {
+
+ .markdown-body,
+ [data-theme="dark"] {
+ /*dark*/
+ color-scheme: dark;
+ --color-prettylights-syntax-comment: #8b949e;
+ --color-prettylights-syntax-constant: #79c0ff;
+ --color-prettylights-syntax-entity: #d2a8ff;
+ --color-prettylights-syntax-storage-modifier-import: #c9d1d9;
+ --color-prettylights-syntax-entity-tag: #7ee787;
+ --color-prettylights-syntax-keyword: #ff7b72;
+ --color-prettylights-syntax-string: #a5d6ff;
+ --color-prettylights-syntax-variable: #ffa657;
+ --color-prettylights-syntax-brackethighlighter-unmatched: #f85149;
+ --color-prettylights-syntax-invalid-illegal-text: #f0f6fc;
+ --color-prettylights-syntax-invalid-illegal-bg: #8e1519;
+ --color-prettylights-syntax-carriage-return-text: #f0f6fc;
+ --color-prettylights-syntax-carriage-return-bg: #b62324;
+ --color-prettylights-syntax-string-regexp: #7ee787;
+ --color-prettylights-syntax-markup-list: #f2cc60;
+ --color-prettylights-syntax-markup-heading: #1f6feb;
+ --color-prettylights-syntax-markup-italic: #c9d1d9;
+ --color-prettylights-syntax-markup-bold: #c9d1d9;
+ --color-prettylights-syntax-markup-deleted-text: #ffdcd7;
+ --color-prettylights-syntax-markup-deleted-bg: #67060c;
+ --color-prettylights-syntax-markup-inserted-text: #aff5b4;
+ --color-prettylights-syntax-markup-inserted-bg: #033a16;
+ --color-prettylights-syntax-markup-changed-text: #ffdfb6;
+ --color-prettylights-syntax-markup-changed-bg: #5a1e02;
+ --color-prettylights-syntax-markup-ignored-text: #c9d1d9;
+ --color-prettylights-syntax-markup-ignored-bg: #1158c7;
+ --color-prettylights-syntax-meta-diff-range: #d2a8ff;
+ --color-prettylights-syntax-brackethighlighter-angle: #8b949e;
+ --color-prettylights-syntax-sublimelinter-gutter-mark: #484f58;
+ --color-prettylights-syntax-constant-other-reference-link: #a5d6ff;
+ --color-fg-default: #e6edf3;
+ --color-fg-muted: #848d97;
+ --color-fg-subtle: #6e7681;
+ --color-canvas-default: #0d1117;
+ --color-canvas-subtle: #161b22;
+ --color-border-default: #30363d;
+ --color-border-muted: #21262d;
+ --color-neutral-muted: rgba(110, 118, 129, 0.4);
+ --color-accent-fg: #2f81f7;
+ --color-accent-emphasis: #1f6feb;
+ --color-success-fg: #3fb950;
+ --color-success-emphasis: #238636;
+ --color-attention-fg: #d29922;
+ --color-attention-emphasis: #9e6a03;
+ --color-attention-subtle: rgba(187, 128, 9, 0.15);
+ --color-danger-fg: #f85149;
+ --color-danger-emphasis: #da3633;
+ --color-done-fg: #a371f7;
+ --color-done-emphasis: #8957e5;
+ }
+}
+
+@media (prefers-color-scheme: light) {
+
+ .markdown-body,
+ [data-theme="light"] {
+ /*light*/
+ color-scheme: light;
+ --color-prettylights-syntax-comment: #57606a;
+ --color-prettylights-syntax-constant: #0550ae;
+ --color-prettylights-syntax-entity: #6639ba;
+ --color-prettylights-syntax-storage-modifier-import: #24292f;
+ --color-prettylights-syntax-entity-tag: #116329;
+ --color-prettylights-syntax-keyword: #cf222e;
+ --color-prettylights-syntax-string: #0a3069;
+ --color-prettylights-syntax-variable: #953800;
+ --color-prettylights-syntax-brackethighlighter-unmatched: #82071e;
+ --color-prettylights-syntax-invalid-illegal-text: #f6f8fa;
+ --color-prettylights-syntax-invalid-illegal-bg: #82071e;
+ --color-prettylights-syntax-carriage-return-text: #f6f8fa;
+ --color-prettylights-syntax-carriage-return-bg: #cf222e;
+ --color-prettylights-syntax-string-regexp: #116329;
+ --color-prettylights-syntax-markup-list: #3b2300;
+ --color-prettylights-syntax-markup-heading: #0550ae;
+ --color-prettylights-syntax-markup-italic: #24292f;
+ --color-prettylights-syntax-markup-bold: #24292f;
+ --color-prettylights-syntax-markup-deleted-text: #82071e;
+ --color-prettylights-syntax-markup-deleted-bg: #ffebe9;
+ --color-prettylights-syntax-markup-inserted-text: #116329;
+ --color-prettylights-syntax-markup-inserted-bg: #dafbe1;
+ --color-prettylights-syntax-markup-changed-text: #953800;
+ --color-prettylights-syntax-markup-changed-bg: #ffd8b5;
+ --color-prettylights-syntax-markup-ignored-text: #eaeef2;
+ --color-prettylights-syntax-markup-ignored-bg: #0550ae;
+ --color-prettylights-syntax-meta-diff-range: #8250df;
+ --color-prettylights-syntax-brackethighlighter-angle: #57606a;
+ --color-prettylights-syntax-sublimelinter-gutter-mark: #8c959f;
+ --color-prettylights-syntax-constant-other-reference-link: #0a3069;
+ --color-fg-default: #1F2328;
+ --color-fg-muted: #656d76;
+ --color-fg-subtle: #6e7781;
+ --color-canvas-default: #ffffff;
+ --color-canvas-subtle: #f6f8fa;
+ --color-border-default: #d0d7de;
+ --color-border-muted: hsla(210, 18%, 87%, 1);
+ --color-neutral-muted: rgba(175, 184, 193, 0.2);
+ --color-accent-fg: #0969da;
+ --color-accent-emphasis: #0969da;
+ --color-success-fg: #1a7f37;
+ --color-success-emphasis: #1f883d;
+ --color-attention-fg: #9a6700;
+ --color-attention-emphasis: #9a6700;
+ --color-attention-subtle: #fff8c5;
+ --color-danger-fg: #d1242f;
+ --color-danger-emphasis: #cf222e;
+ --color-done-fg: #8250df;
+ --color-done-emphasis: #8250df;
+ }
+}
+
+.markdown-body {
+ -ms-text-size-adjust: 100%;
+ -webkit-text-size-adjust: 100%;
+ margin: 0;
+ color: var(--color-fg-default);
+ background-color: var(--color-canvas-default);
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Noto Sans", Helvetica, Arial, sans-serif, "Apple Color Emoji", "Segoe UI Emoji";
+ font-size: 16px;
+ line-height: 1.5;
+ word-wrap: break-word;
+}
+
+.markdown-body .octicon {
+ display: inline-block;
+ fill: currentColor;
+ vertical-align: text-bottom;
+}
+
+.markdown-body h1:hover .anchor .octicon-link:before,
+.markdown-body h2:hover .anchor .octicon-link:before,
+.markdown-body h3:hover .anchor .octicon-link:before,
+.markdown-body h4:hover .anchor .octicon-link:before,
+.markdown-body h5:hover .anchor .octicon-link:before,
+.markdown-body h6:hover .anchor .octicon-link:before {
+ width: 16px;
+ height: 16px;
+ content: ' ';
+ display: inline-block;
+ background-color: currentColor;
+ -webkit-mask-image: url("data:image/svg+xml, ");
+ mask-image: url("data:image/svg+xml, ");
+}
+
+.markdown-body details,
+.markdown-body figcaption,
+.markdown-body figure {
+ display: block;
+}
+
+.markdown-body summary {
+ display: list-item;
+}
+
+.markdown-body [hidden] {
+ display: none !important;
+}
+
+.markdown-body a {
+ background-color: transparent;
+ color: var(--color-accent-fg);
+ text-decoration: none;
+}
+
+.markdown-body abbr[title] {
+ border-bottom: none;
+ -webkit-text-decoration: underline dotted;
+ text-decoration: underline dotted;
+}
+
+.markdown-body b,
+.markdown-body strong {
+ font-weight: var(--base-text-weight-semibold, 600);
+}
+
+.markdown-body dfn {
+ font-style: italic;
+}
+
+.markdown-body h1 {
+ margin: .67em 0;
+ font-weight: var(--base-text-weight-semibold, 600);
+ padding-bottom: .3em;
+ font-size: 2em;
+ border-bottom: 1px solid var(--color-border-muted);
+}
+
+.markdown-body mark {
+ background-color: var(--color-attention-subtle);
+ color: var(--color-fg-default);
+}
+
+.markdown-body small {
+ font-size: 90%;
+}
+
+.markdown-body sub,
+.markdown-body sup {
+ font-size: 75%;
+ line-height: 0;
+ position: relative;
+ vertical-align: baseline;
+}
+
+.markdown-body sub {
+ bottom: -0.25em;
+}
+
+.markdown-body sup {
+ top: -0.5em;
+}
+
+.markdown-body img {
+ border-style: none;
+ max-width: 100%;
+ box-sizing: content-box;
+ background-color: var(--color-canvas-default);
+}
+
+.markdown-body code,
+.markdown-body kbd,
+.markdown-body pre,
+.markdown-body samp {
+ font-family: monospace;
+ font-size: 1em;
+}
+
+.markdown-body figure {
+ margin: 1em 40px;
+}
+
+.markdown-body hr {
+ box-sizing: content-box;
+ overflow: hidden;
+ background: transparent;
+ border-bottom: 1px solid var(--color-border-muted);
+ height: .25em;
+ padding: 0;
+ margin: 24px 0;
+ background-color: var(--color-border-default);
+ border: 0;
+}
+
+.markdown-body input {
+ font: inherit;
+ margin: 0;
+ overflow: visible;
+ font-family: inherit;
+ font-size: inherit;
+ line-height: inherit;
+}
+
+.markdown-body [type=button],
+.markdown-body [type=reset],
+.markdown-body [type=submit] {
+ -webkit-appearance: button;
+ appearance: button;
+}
+
+.markdown-body [type=checkbox],
+.markdown-body [type=radio] {
+ box-sizing: border-box;
+ padding: 0;
+}
+
+.markdown-body [type=number]::-webkit-inner-spin-button,
+.markdown-body [type=number]::-webkit-outer-spin-button {
+ height: auto;
+}
+
+.markdown-body [type=search]::-webkit-search-cancel-button,
+.markdown-body [type=search]::-webkit-search-decoration {
+ -webkit-appearance: none;
+ appearance: none;
+}
+
+.markdown-body ::-webkit-input-placeholder {
+ color: inherit;
+ opacity: .54;
+}
+
+.markdown-body ::-webkit-file-upload-button {
+ -webkit-appearance: button;
+ appearance: button;
+ font: inherit;
+}
+
+.markdown-body a:hover {
+ text-decoration: underline;
+}
+
+.markdown-body ::placeholder {
+ color: var(--color-fg-subtle);
+ opacity: 1;
+}
+
+.markdown-body hr::before {
+ display: table;
+ content: "";
+}
+
+.markdown-body hr::after {
+ display: table;
+ clear: both;
+ content: "";
+}
+
+.markdown-body table {
+ border-spacing: 0;
+ border-collapse: collapse;
+ display: block;
+ width: max-content;
+ max-width: 100%;
+ overflow: auto;
+}
+
+.markdown-body td,
+.markdown-body th {
+ padding: 0;
+}
+
+.markdown-body details summary {
+ cursor: pointer;
+}
+
+.markdown-body details:not([open])>*:not(summary) {
+ display: none !important;
+}
+
+.markdown-body a:focus,
+.markdown-body [role=button]:focus,
+.markdown-body input[type=radio]:focus,
+.markdown-body input[type=checkbox]:focus {
+ outline: 2px solid var(--color-accent-fg);
+ outline-offset: -2px;
+ box-shadow: none;
+}
+
+.markdown-body a:focus:not(:focus-visible),
+.markdown-body [role=button]:focus:not(:focus-visible),
+.markdown-body input[type=radio]:focus:not(:focus-visible),
+.markdown-body input[type=checkbox]:focus:not(:focus-visible) {
+ outline: solid 1px transparent;
+}
+
+.markdown-body a:focus-visible,
+.markdown-body [role=button]:focus-visible,
+.markdown-body input[type=radio]:focus-visible,
+.markdown-body input[type=checkbox]:focus-visible {
+ outline: 2px solid var(--color-accent-fg);
+ outline-offset: -2px;
+ box-shadow: none;
+}
+
+.markdown-body a:not([class]):focus,
+.markdown-body a:not([class]):focus-visible,
+.markdown-body input[type=radio]:focus,
+.markdown-body input[type=radio]:focus-visible,
+.markdown-body input[type=checkbox]:focus,
+.markdown-body input[type=checkbox]:focus-visible {
+ outline-offset: 0;
+}
+
+.markdown-body kbd {
+ display: inline-block;
+ padding: 3px 5px;
+ font: 11px ui-monospace, SFMono-Regular, SF Mono, Menlo, Consolas, Liberation Mono, monospace;
+ line-height: 10px;
+ color: var(--color-fg-default);
+ vertical-align: middle;
+ background-color: var(--color-canvas-subtle);
+ border: solid 1px var(--color-neutral-muted);
+ border-bottom-color: var(--color-neutral-muted);
+ border-radius: 6px;
+ box-shadow: inset 0 -1px 0 var(--color-neutral-muted);
+}
+
+.markdown-body h1,
+.markdown-body h2,
+.markdown-body h3,
+.markdown-body h4,
+.markdown-body h5,
+.markdown-body h6 {
+ margin-top: 24px;
+ margin-bottom: 16px;
+ font-weight: var(--base-text-weight-semibold, 600);
+ line-height: 1.25;
+}
+
+.markdown-body h2 {
+ font-weight: var(--base-text-weight-semibold, 600);
+ padding-bottom: .3em;
+ font-size: 1.5em;
+ border-bottom: 1px solid var(--color-border-muted);
+}
+
+.markdown-body h3 {
+ font-weight: var(--base-text-weight-semibold, 600);
+ font-size: 1.25em;
+}
+
+.markdown-body h4 {
+ font-weight: var(--base-text-weight-semibold, 600);
+ font-size: 1em;
+}
+
+.markdown-body h5 {
+ font-weight: var(--base-text-weight-semibold, 600);
+ font-size: .875em;
+}
+
+.markdown-body h6 {
+ font-weight: var(--base-text-weight-semibold, 600);
+ font-size: .85em;
+ color: var(--color-fg-muted);
+}
+
+.markdown-body p {
+ margin-top: 0;
+ margin-bottom: 10px;
+}
+
+.markdown-body blockquote {
+ margin: 0;
+ padding: 0 1em;
+ color: var(--color-fg-muted);
+ border-left: .25em solid var(--color-border-default);
+}
+
+.markdown-body ul,
+.markdown-body ol {
+ margin-top: 0;
+ margin-bottom: 0;
+ padding-left: 2em;
+}
+
+.markdown-body ol ol,
+.markdown-body ul ol {
+ list-style-type: lower-roman;
+}
+
+.markdown-body ul ul ol,
+.markdown-body ul ol ol,
+.markdown-body ol ul ol,
+.markdown-body ol ol ol {
+ list-style-type: lower-alpha;
+}
+
+.markdown-body dd {
+ margin-left: 0;
+}
+
+.markdown-body tt,
+.markdown-body code,
+.markdown-body samp {
+ font-family: ui-monospace, SFMono-Regular, SF Mono, Menlo, Consolas, Liberation Mono, monospace;
+ font-size: 12px;
+}
+
+.markdown-body pre {
+ margin-top: 0;
+ margin-bottom: 0;
+ font-family: ui-monospace, SFMono-Regular, SF Mono, Menlo, Consolas, Liberation Mono, monospace;
+ font-size: 12px;
+ word-wrap: normal;
+}
+
+.markdown-body .octicon {
+ display: inline-block;
+ overflow: visible !important;
+ vertical-align: text-bottom;
+ fill: currentColor;
+}
+
+.markdown-body input::-webkit-outer-spin-button,
+.markdown-body input::-webkit-inner-spin-button {
+ margin: 0;
+ -webkit-appearance: none;
+ appearance: none;
+}
+
+.markdown-body .mr-2 {
+ margin-right: var(--base-size-8, 8px) !important;
+}
+
+.markdown-body::before {
+ display: table;
+ content: "";
+}
+
+.markdown-body::after {
+ display: table;
+ clear: both;
+ content: "";
+}
+
+.markdown-body>*:first-child {
+ margin-top: 0 !important;
+}
+
+.markdown-body>*:last-child {
+ margin-bottom: 0 !important;
+}
+
+.markdown-body a:not([href]) {
+ color: inherit;
+ text-decoration: none;
+}
+
+.markdown-body .absent {
+ color: var(--color-danger-fg);
+}
+
+.markdown-body .anchor {
+ float: left;
+ padding-right: 4px;
+ margin-left: -20px;
+ line-height: 1;
+}
+
+.markdown-body .anchor:focus {
+ outline: none;
+}
+
+.markdown-body p,
+.markdown-body blockquote,
+.markdown-body ul,
+.markdown-body ol,
+.markdown-body dl,
+.markdown-body table,
+.markdown-body pre,
+.markdown-body details {
+ margin-top: 0;
+ margin-bottom: 16px;
+}
+
+.markdown-body blockquote>:first-child {
+ margin-top: 0;
+}
+
+.markdown-body blockquote>:last-child {
+ margin-bottom: 0;
+}
+
+.markdown-body h1 .octicon-link,
+.markdown-body h2 .octicon-link,
+.markdown-body h3 .octicon-link,
+.markdown-body h4 .octicon-link,
+.markdown-body h5 .octicon-link,
+.markdown-body h6 .octicon-link {
+ color: var(--color-fg-default);
+ vertical-align: middle;
+ visibility: hidden;
+}
+
+.markdown-body h1:hover .anchor,
+.markdown-body h2:hover .anchor,
+.markdown-body h3:hover .anchor,
+.markdown-body h4:hover .anchor,
+.markdown-body h5:hover .anchor,
+.markdown-body h6:hover .anchor {
+ text-decoration: none;
+}
+
+.markdown-body h1:hover .anchor .octicon-link,
+.markdown-body h2:hover .anchor .octicon-link,
+.markdown-body h3:hover .anchor .octicon-link,
+.markdown-body h4:hover .anchor .octicon-link,
+.markdown-body h5:hover .anchor .octicon-link,
+.markdown-body h6:hover .anchor .octicon-link {
+ visibility: visible;
+}
+
+.markdown-body h1 tt,
+.markdown-body h1 code,
+.markdown-body h2 tt,
+.markdown-body h2 code,
+.markdown-body h3 tt,
+.markdown-body h3 code,
+.markdown-body h4 tt,
+.markdown-body h4 code,
+.markdown-body h5 tt,
+.markdown-body h5 code,
+.markdown-body h6 tt,
+.markdown-body h6 code {
+ padding: 0 .2em;
+ font-size: inherit;
+}
+
+.markdown-body summary h1,
+.markdown-body summary h2,
+.markdown-body summary h3,
+.markdown-body summary h4,
+.markdown-body summary h5,
+.markdown-body summary h6 {
+ display: inline-block;
+}
+
+.markdown-body summary h1 .anchor,
+.markdown-body summary h2 .anchor,
+.markdown-body summary h3 .anchor,
+.markdown-body summary h4 .anchor,
+.markdown-body summary h5 .anchor,
+.markdown-body summary h6 .anchor {
+ margin-left: -40px;
+}
+
+.markdown-body summary h1,
+.markdown-body summary h2 {
+ padding-bottom: 0;
+ border-bottom: 0;
+}
+
+.markdown-body ul.no-list,
+.markdown-body ol.no-list {
+ padding: 0;
+ list-style-type: none;
+}
+
+.markdown-body ol[type="a s"] {
+ list-style-type: lower-alpha;
+}
+
+.markdown-body ol[type="A s"] {
+ list-style-type: upper-alpha;
+}
+
+.markdown-body ol[type="i s"] {
+ list-style-type: lower-roman;
+}
+
+.markdown-body ol[type="I s"] {
+ list-style-type: upper-roman;
+}
+
+.markdown-body ol[type="1"] {
+ list-style-type: decimal;
+}
+
+.markdown-body div>ol:not([type]) {
+ list-style-type: decimal;
+}
+
+.markdown-body ul ul,
+.markdown-body ul ol,
+.markdown-body ol ol,
+.markdown-body ol ul {
+ margin-top: 0;
+ margin-bottom: 0;
+}
+
+.markdown-body li>p {
+ margin-top: 16px;
+}
+
+.markdown-body li+li {
+ margin-top: .25em;
+}
+
+.markdown-body dl {
+ padding: 0;
+}
+
+.markdown-body dl dt {
+ padding: 0;
+ margin-top: 16px;
+ font-size: 1em;
+ font-style: italic;
+ font-weight: var(--base-text-weight-semibold, 600);
+}
+
+.markdown-body dl dd {
+ padding: 0 16px;
+ margin-bottom: 16px;
+}
+
+.markdown-body table th {
+ font-weight: var(--base-text-weight-semibold, 600);
+}
+
+.markdown-body table th,
+.markdown-body table td {
+ padding: 6px 13px;
+ border: 1px solid var(--color-border-default);
+}
+
+.markdown-body table td>:last-child {
+ margin-bottom: 0;
+}
+
+.markdown-body table tr {
+ background-color: var(--color-canvas-default);
+ border-top: 1px solid var(--color-border-muted);
+}
+
+.markdown-body table tr:nth-child(2n) {
+ background-color: var(--color-canvas-subtle);
+}
+
+.markdown-body table img {
+ background-color: transparent;
+}
+
+.markdown-body img[align=right] {
+ padding-left: 20px;
+}
+
+.markdown-body img[align=left] {
+ padding-right: 20px;
+}
+
+.markdown-body .emoji {
+ max-width: none;
+ vertical-align: text-top;
+ background-color: transparent;
+}
+
+.markdown-body span.frame {
+ display: block;
+ overflow: hidden;
+}
+
+.markdown-body span.frame>span {
+ display: block;
+ float: left;
+ width: auto;
+ padding: 7px;
+ margin: 13px 0 0;
+ overflow: hidden;
+ border: 1px solid var(--color-border-default);
+}
+
+.markdown-body span.frame span img {
+ display: block;
+ float: left;
+}
+
+.markdown-body span.frame span span {
+ display: block;
+ padding: 5px 0 0;
+ clear: both;
+ color: var(--color-fg-default);
+}
+
+.markdown-body span.align-center {
+ display: block;
+ overflow: hidden;
+ clear: both;
+}
+
+.markdown-body span.align-center>span {
+ display: block;
+ margin: 13px auto 0;
+ overflow: hidden;
+ text-align: center;
+}
+
+.markdown-body span.align-center span img {
+ margin: 0 auto;
+ text-align: center;
+}
+
+.markdown-body span.align-right {
+ display: block;
+ overflow: hidden;
+ clear: both;
+}
+
+.markdown-body span.align-right>span {
+ display: block;
+ margin: 13px 0 0;
+ overflow: hidden;
+ text-align: right;
+}
+
+.markdown-body span.align-right span img {
+ margin: 0;
+ text-align: right;
+}
+
+.markdown-body span.float-left {
+ display: block;
+ float: left;
+ margin-right: 13px;
+ overflow: hidden;
+}
+
+.markdown-body span.float-left span {
+ margin: 13px 0 0;
+}
+
+.markdown-body span.float-right {
+ display: block;
+ float: right;
+ margin-left: 13px;
+ overflow: hidden;
+}
+
+.markdown-body span.float-right>span {
+ display: block;
+ margin: 13px auto 0;
+ overflow: hidden;
+ text-align: right;
+}
+
+.markdown-body code,
+.markdown-body tt {
+ padding: .2em .4em;
+ margin: 0;
+ font-size: 85%;
+ white-space: break-spaces;
+ background-color: var(--color-neutral-muted);
+ border-radius: 6px;
+}
+
+.markdown-body code br,
+.markdown-body tt br {
+ display: none;
+}
+
+.markdown-body del code {
+ text-decoration: inherit;
+}
+
+.markdown-body samp {
+ font-size: 85%;
+}
+
+.markdown-body pre code {
+ font-size: 100%;
+}
+
+.markdown-body pre>code {
+ padding: 0;
+ margin: 0;
+ word-break: normal;
+ white-space: pre;
+ background: transparent;
+ border: 0;
+}
+
+.markdown-body .highlight {
+ margin-bottom: 16px;
+}
+
+.markdown-body .highlight pre {
+ margin-bottom: 0;
+ word-break: normal;
+}
+
+.markdown-body .highlight pre,
+.markdown-body pre {
+ padding: 16px;
+ overflow: auto;
+ font-size: 85%;
+ line-height: 1.45;
+ color: var(--color-fg-default);
+ background-color: var(--color-canvas-subtle);
+ border-radius: 6px;
+}
+
+.markdown-body pre code,
+.markdown-body pre tt {
+ display: inline;
+ max-width: auto;
+ padding: 0;
+ margin: 0;
+ overflow: visible;
+ line-height: inherit;
+ word-wrap: normal;
+ background-color: transparent;
+ border: 0;
+}
+
+.markdown-body .csv-data td,
+.markdown-body .csv-data th {
+ padding: 5px;
+ overflow: hidden;
+ font-size: 12px;
+ line-height: 1;
+ text-align: left;
+ white-space: nowrap;
+}
+
+.markdown-body .csv-data .blob-num {
+ padding: 10px 8px 9px;
+ text-align: right;
+ background: var(--color-canvas-default);
+ border: 0;
+}
+
+.markdown-body .csv-data tr {
+ border-top: 0;
+}
+
+.markdown-body .csv-data th {
+ font-weight: var(--base-text-weight-semibold, 600);
+ background: var(--color-canvas-subtle);
+ border-top: 0;
+}
+
+.markdown-body [data-footnote-ref]::before {
+ content: "[";
+}
+
+.markdown-body [data-footnote-ref]::after {
+ content: "]";
+}
+
+.markdown-body .footnotes {
+ font-size: 12px;
+ color: var(--color-fg-muted);
+ border-top: 1px solid var(--color-border-default);
+}
+
+.markdown-body .footnotes ol {
+ padding-left: 16px;
+}
+
+.markdown-body .footnotes ol ul {
+ display: inline-block;
+ padding-left: 16px;
+ margin-top: 16px;
+}
+
+.markdown-body .footnotes li {
+ position: relative;
+}
+
+.markdown-body .footnotes li:target::before {
+ position: absolute;
+ top: -8px;
+ right: -8px;
+ bottom: -8px;
+ left: -24px;
+ pointer-events: none;
+ content: "";
+ border: 2px solid var(--color-accent-emphasis);
+ border-radius: 6px;
+}
+
+.markdown-body .footnotes li:target {
+ color: var(--color-fg-default);
+}
+
+.markdown-body .footnotes .data-footnote-backref g-emoji {
+ font-family: monospace;
+}
+
+.markdown-body .pl-c {
+ color: var(--color-prettylights-syntax-comment);
+}
+
+.markdown-body .pl-c1,
+.markdown-body .pl-s .pl-v {
+ color: var(--color-prettylights-syntax-constant);
+}
+
+.markdown-body .pl-e,
+.markdown-body .pl-en {
+ color: var(--color-prettylights-syntax-entity);
+}
+
+.markdown-body .pl-smi,
+.markdown-body .pl-s .pl-s1 {
+ color: var(--color-prettylights-syntax-storage-modifier-import);
+}
+
+.markdown-body .pl-ent {
+ color: var(--color-prettylights-syntax-entity-tag);
+}
+
+.markdown-body .pl-k {
+ color: var(--color-prettylights-syntax-keyword);
+}
+
+.markdown-body .pl-s,
+.markdown-body .pl-pds,
+.markdown-body .pl-s .pl-pse .pl-s1,
+.markdown-body .pl-sr,
+.markdown-body .pl-sr .pl-cce,
+.markdown-body .pl-sr .pl-sre,
+.markdown-body .pl-sr .pl-sra {
+ color: var(--color-prettylights-syntax-string);
+}
+
+.markdown-body .pl-v,
+.markdown-body .pl-smw {
+ color: var(--color-prettylights-syntax-variable);
+}
+
+.markdown-body .pl-bu {
+ color: var(--color-prettylights-syntax-brackethighlighter-unmatched);
+}
+
+.markdown-body .pl-ii {
+ color: var(--color-prettylights-syntax-invalid-illegal-text);
+ background-color: var(--color-prettylights-syntax-invalid-illegal-bg);
+}
+
+.markdown-body .pl-c2 {
+ color: var(--color-prettylights-syntax-carriage-return-text);
+ background-color: var(--color-prettylights-syntax-carriage-return-bg);
+}
+
+.markdown-body .pl-sr .pl-cce {
+ font-weight: bold;
+ color: var(--color-prettylights-syntax-string-regexp);
+}
+
+.markdown-body .pl-ml {
+ color: var(--color-prettylights-syntax-markup-list);
+}
+
+.markdown-body .pl-mh,
+.markdown-body .pl-mh .pl-en,
+.markdown-body .pl-ms {
+ font-weight: bold;
+ color: var(--color-prettylights-syntax-markup-heading);
+}
+
+.markdown-body .pl-mi {
+ font-style: italic;
+ color: var(--color-prettylights-syntax-markup-italic);
+}
+
+.markdown-body .pl-mb {
+ font-weight: bold;
+ color: var(--color-prettylights-syntax-markup-bold);
+}
+
+.markdown-body .pl-md {
+ color: var(--color-prettylights-syntax-markup-deleted-text);
+ background-color: var(--color-prettylights-syntax-markup-deleted-bg);
+}
+
+.markdown-body .pl-mi1 {
+ color: var(--color-prettylights-syntax-markup-inserted-text);
+ background-color: var(--color-prettylights-syntax-markup-inserted-bg);
+}
+
+.markdown-body .pl-mc {
+ color: var(--color-prettylights-syntax-markup-changed-text);
+ background-color: var(--color-prettylights-syntax-markup-changed-bg);
+}
+
+.markdown-body .pl-mi2 {
+ color: var(--color-prettylights-syntax-markup-ignored-text);
+ background-color: var(--color-prettylights-syntax-markup-ignored-bg);
+}
+
+.markdown-body .pl-mdr {
+ font-weight: bold;
+ color: var(--color-prettylights-syntax-meta-diff-range);
+}
+
+.markdown-body .pl-ba {
+ color: var(--color-prettylights-syntax-brackethighlighter-angle);
+}
+
+.markdown-body .pl-sg {
+ color: var(--color-prettylights-syntax-sublimelinter-gutter-mark);
+}
+
+.markdown-body .pl-corl {
+ text-decoration: underline;
+ color: var(--color-prettylights-syntax-constant-other-reference-link);
+}
+
+.markdown-body g-emoji {
+ display: inline-block;
+ min-width: 1ch;
+ font-family: "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol";
+ font-size: 1em;
+ font-style: normal !important;
+ font-weight: var(--base-text-weight-normal, 400);
+ line-height: 1;
+ vertical-align: -0.075em;
+}
+
+.markdown-body g-emoji img {
+ width: 1em;
+ height: 1em;
+}
+
+.markdown-body .task-list-item {
+ list-style-type: none;
+}
+
+.markdown-body .task-list-item label {
+ font-weight: var(--base-text-weight-normal, 400);
+}
+
+.markdown-body .task-list-item.enabled label {
+ cursor: pointer;
+}
+
+.markdown-body .task-list-item+.task-list-item {
+ margin-top: 4px;
+}
+
+.markdown-body .task-list-item .handle {
+ display: none;
+}
+
+.markdown-body .task-list-item-checkbox {
+ margin: 0 .2em .25em -1.4em;
+ vertical-align: middle;
+}
+
+.markdown-body .contains-task-list:dir(rtl) .task-list-item-checkbox {
+ margin: 0 -1.6em .25em .2em;
+}
+
+.markdown-body .contains-task-list {
+ position: relative;
+}
+
+.markdown-body .contains-task-list:hover .task-list-item-convert-container,
+.markdown-body .contains-task-list:focus-within .task-list-item-convert-container {
+ display: block;
+ width: auto;
+ height: 24px;
+ overflow: visible;
+ clip: auto;
+}
+
+.markdown-body ::-webkit-calendar-picker-indicator {
+ filter: invert(50%);
+}
+
+.markdown-body .markdown-alert {
+ padding: var(--base-size-8) var(--base-size-16);
+ margin-bottom: 16px;
+ color: inherit;
+ border-left: .25em solid var(--color-border-default);
+}
+
+.markdown-body .markdown-alert>:first-child {
+ margin-top: 0;
+}
+
+.markdown-body .markdown-alert>:last-child {
+ margin-bottom: 0;
+}
+
+.markdown-body .markdown-alert .markdown-alert-title {
+ display: flex;
+ font-weight: var(--base-text-weight-medium, 500);
+ align-items: center;
+ line-height: 1;
+}
+
+.markdown-body .markdown-alert.markdown-alert-note {
+ border-left-color: var(--color-accent-emphasis);
+}
+
+.markdown-body .markdown-alert.markdown-alert-note .markdown-alert-title {
+ color: var(--color-accent-fg);
+}
+
+.markdown-body .markdown-alert.markdown-alert-important {
+ border-left-color: var(--color-done-emphasis);
+}
+
+.markdown-body .markdown-alert.markdown-alert-important .markdown-alert-title {
+ color: var(--color-done-fg);
+}
+
+.markdown-body .markdown-alert.markdown-alert-warning {
+ border-left-color: var(--color-attention-emphasis);
+}
+
+.markdown-body .markdown-alert.markdown-alert-warning .markdown-alert-title {
+ color: var(--color-attention-fg);
+}
+
+.markdown-body .markdown-alert.markdown-alert-tip {
+ border-left-color: var(--color-success-emphasis);
+}
+
+.markdown-body .markdown-alert.markdown-alert-tip .markdown-alert-title {
+ color: var(--color-success-fg);
+}
+
+.markdown-body .markdown-alert.markdown-alert-caution {
+ border-left-color: var(--color-danger-emphasis);
+}
+
+.markdown-body .markdown-alert.markdown-alert-caution .markdown-alert-title {
+ color: var(--color-danger-fg);
+}
\ No newline at end of file
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..d1154b4
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,1250 @@
+
+
+
+
+
+
+
+
+
+ Search Code By Comment
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/docs/metadata.json b/docs/metadata.json
new file mode 100644
index 0000000..119c7aa
--- /dev/null
+++ b/docs/metadata.json
@@ -0,0 +1,125 @@
+{
+ "url": {
+ "full": "https://github.com/openai/Video-Pre-Training",
+ "partial": "openai/Video-Pre-Training"
+ },
+ "file_mapping": {
+ "0": {
+ "filepath": "/README.md",
+ "entry_id": 0,
+ "language_id": "plain-text"
+ },
+ "1": {
+ "filepath": "/agent.py",
+ "entry_id": 54,
+ "language_id": "python"
+ },
+ "2": {
+ "filepath": "/behavioural_cloning.py",
+ "entry_id": 72,
+ "language_id": "python"
+ },
+ "3": {
+ "filepath": "/data_loader.py",
+ "entry_id": 86,
+ "language_id": "python"
+ },
+ "4": {
+ "filepath": "/inverse_dynamics_model.py",
+ "entry_id": 106,
+ "language_id": "python"
+ },
+ "5": {
+ "filepath": "/lib/action_head.py",
+ "entry_id": 116,
+ "language_id": "python"
+ },
+ "6": {
+ "filepath": "/lib/action_mapping.py",
+ "entry_id": 140,
+ "language_id": "python"
+ },
+ "7": {
+ "filepath": "/lib/actions.py",
+ "entry_id": 164,
+ "language_id": "python"
+ },
+ "8": {
+ "filepath": "/lib/impala_cnn.py",
+ "entry_id": 178,
+ "language_id": "python"
+ },
+ "9": {
+ "filepath": "/lib/masked_attention.py",
+ "entry_id": 192,
+ "language_id": "python"
+ },
+ "10": {
+ "filepath": "/lib/minecraft_util.py",
+ "entry_id": 210,
+ "language_id": "python"
+ },
+ "11": {
+ "filepath": "/lib/misc.py",
+ "entry_id": 220,
+ "language_id": "python"
+ },
+ "12": {
+ "filepath": "/lib/mlp.py",
+ "entry_id": 238,
+ "language_id": "python"
+ },
+ "13": {
+ "filepath": "/lib/normalize_ewma.py",
+ "entry_id": 242,
+ "language_id": "python"
+ },
+ "14": {
+ "filepath": "/lib/policy.py",
+ "entry_id": 250,
+ "language_id": "python"
+ },
+ "15": {
+ "filepath": "/lib/scaled_mse_head.py",
+ "entry_id": 284,
+ "language_id": "python"
+ },
+ "16": {
+ "filepath": "/lib/torch_util.py",
+ "entry_id": 290,
+ "language_id": "python"
+ },
+ "17": {
+ "filepath": "/lib/tree_util.py",
+ "entry_id": 304,
+ "language_id": "python"
+ },
+ "18": {
+ "filepath": "/lib/util.py",
+ "entry_id": 324,
+ "language_id": "python"
+ },
+ "19": {
+ "filepath": "/lib/xf.py",
+ "entry_id": 344,
+ "language_id": "python"
+ },
+ "20": {
+ "filepath": "/requirements.txt",
+ "entry_id": 376,
+ "language_id": "plain-text"
+ },
+ "21": {
+ "filepath": "/run_agent.py",
+ "entry_id": 380,
+ "language_id": "python"
+ },
+ "22": {
+ "filepath": "/run_inverse_dynamics_model.py",
+ "entry_id": 386,
+ "language_id": "python"
+ }
+ },
+ "project_name": "Video-Pre-Training",
+ "split_count": 5
+}
\ No newline at end of file
diff --git a/docs/metadata_title.json b/docs/metadata_title.json
new file mode 100644
index 0000000..0103dd2
--- /dev/null
+++ b/docs/metadata_title.json
@@ -0,0 +1 @@
+{"split_count": 1}
\ No newline at end of file
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
new file mode 100644
index 0000000..7459304
--- /dev/null
+++ b/docs/sitemap.xml
@@ -0,0 +1,151 @@
+
+
+
+
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/README.md
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/agent.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/behavioural_cloning.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/data_loader.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/inverse_dynamics_model.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/action_head.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/action_mapping.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/actions.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/impala_cnn.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/masked_attention.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/minecraft_util.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/misc.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/mlp.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/normalize_ewma.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/policy.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/scaled_mse_head.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/torch_util.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/tree_util.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/util.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/lib/xf.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/requirements.txt
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/run_agent.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training?q=/run_inverse_dynamics_model.py
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
+ https://james4ever0.github.io/Video-Pre-Training/tree.html?full=true
+ 2023-12-28T09:21:02+00:00
+ 1.00
+
+
+
\ No newline at end of file
diff --git a/docs/src/README.md b/docs/src/README.md
new file mode 100644
index 0000000..2bd205b
--- /dev/null
+++ b/docs/src/README.md
@@ -0,0 +1,497 @@
+
+
+# Video-Pre-Training
+Video PreTraining (VPT): Learning to Act by Watching Unlabeled Online Videos
+
+
+> :page_facing_up: [Read Paper](https://cdn.openai.com/vpt/Paper.pdf) \
+ :mega: [Blog Post](https://openai.com/blog/vpt) \
+ :space_invader: [MineRL Environment](https://github.com/minerllabs/minerl) (note version 1.0+ required) \
+ :checkered_flag: [MineRL BASALT Competition](https://www.aicrowd.com/challenges/neurips-2022-minerl-basalt-competition)
+
+
+# Running agent models
+
+Install pre-requirements for [MineRL](https://minerl.readthedocs.io/en/latest/tutorials/index.html).
+Then install requirements with:
+
+```
+pip install git+https://github.com/minerllabs/minerl
+pip install -r requirements.txt
+```
+
+To run the code, call
+
+```
+python run_agent.py --model [path to .model file] --weights [path to .weight file]
+```
+
+After loading up, you should see a window of the agent playing Minecraft.
+
+
+
+# Agent Model Zoo
+Below are the model files and weights files for various pre-trained Minecraft models.
+The 1x, 2x and 3x model files correspond to their respective model weights width.
+
+* [:arrow_down: 1x Model](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-1x.model)
+* [:arrow_down: 2x Model](https://openaipublic.blob.core.windows.net/minecraft-rl/models/2x.model)
+* [:arrow_down: 3x Model](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-3x.model)
+
+### Demonstration Only - Behavioral Cloning
+These models are trained on video demonstrations of humans playing Minecraft
+using behavioral cloning (BC) and are more general than later models which
+use reinforcement learning (RL) to further optimize the policy.
+Foundational models are trained across all videos in a single training run
+while house and early game models refine their respective size foundational
+model further using either the housebuilding contractor data or early game video
+sub-set. See the paper linked above for more details.
+
+#### Foundational Model :chart_with_upwards_trend:
+ * [:arrow_down: 1x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-1x.weights)
+ * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-2x.weights)
+ * [:arrow_down: 3x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/foundation-model-3x.weights)
+
+#### Fine-Tuned from House :chart_with_upwards_trend:
+ * [:arrow_down: 3x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/bc-house-3x.weights)
+
+#### Fine-Tuned from Early Game :chart_with_upwards_trend:
+ * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/bc-early-game-2x.weights)
+ * [:arrow_down: 3x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/bc-early-game-3x.weights)
+
+### Models With Environment Interactions
+These models further refine the above demonstration based models with a reward
+function targeted at obtaining diamond pickaxes. While less general then the behavioral
+cloning models, these models have the benefit of interacting with the environment
+using a reward function and excel at progressing through the tech tree quickly.
+See the paper for more information
+on how they were trained and the exact reward schedule.
+
+#### RL from Foundation :chart_with_upwards_trend:
+ * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/rl-from-foundation-2x.weights)
+
+#### RL from House :chart_with_upwards_trend:
+ * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/rl-from-house-2x.weights)
+
+#### RL from Early Game :chart_with_upwards_trend:
+ * [:arrow_down: 2x Width Weights](https://openaipublic.blob.core.windows.net/minecraft-rl/models/rl-from-early-game-2x.weights)
+
+# Running Inverse Dynamics Model (IDM)
+
+IDM aims to predict what actions player is taking in a video recording.
+
+Setup:
+* Install requirements: `pip install -r requirements.txt`
+* Download the IDM model [.model :arrow_down:](https://openaipublic.blob.core.windows.net/minecraft-rl/idm/4x_idm.model) and [.weight :arrow_down:](https://openaipublic.blob.core.windows.net/minecraft-rl/idm/4x_idm.weights) files
+* For demonstration purposes, you can use the contractor recordings shared below to. For this demo we use
+ [this .mp4](https://openaipublic.blob.core.windows.net/minecraft-rl/data/10.0/cheeky-cornflower-setter-02e496ce4abb-20220421-092639.mp4)
+ and [this associated actions file (.jsonl)](https://openaipublic.blob.core.windows.net/minecraft-rl/data/10.0/cheeky-cornflower-setter-02e496ce4abb-20220421-092639.jsonl).
+
+To run the model with above files placed in the root directory of this code:
+```
+python run_inverse_dynamics_model.py -weights 4x_idm.weights --model 4x_idm.model --video-path cheeky-cornflower-setter-02e496ce4abb-20220421-092639.mp4 --jsonl-path cheeky-cornflower-setter-02e496ce4abb-20220421-092639.jsonl
+```
+
+A window should pop up which shows the video frame-by-frame, showing the predicted and true (recorded) actions side-by-side on the left.
+
+Note that `run_inverse_dynamics_model.py` is designed to be a demo of the IDM, not code to put it into practice.
+
+# Using behavioural cloning to fine-tune the models
+
+**Disclaimer:** This code is a rough demonstration only and not an exact recreation of what original VPT paper did (but it contains some preprocessing steps you want to be aware of)! As such, do not expect replicate the original experiments with this code. This code has been designed to be run-able on consumer hardware (e.g., 8GB of VRAM).
+
+Setup:
+* Install requirements: `pip install -r requirements.txt`
+* Download `.weights` and `.model` file for model you want to fine-tune.
+* Download contractor data (below) and place the `.mp4` and `.jsonl` files to the same directory (e.g., `data`). With default settings, you need at least 12 recordings.
+
+If you downloaded the "1x Width" models and placed some data under `data` directory, you can perform finetuning with
+
+```
+python behavioural_cloning.py --data-dir data --in-model foundation-model-1x.model --in-weights foundation-model-1x.weights --out-weights finetuned-1x.weights
+```
+
+You can then use `finetuned-1x.weights` when running the agent. You can change the training settings at the top of `behavioural_cloning.py`.
+
+Major limitations:
+- Only trains single step at the time, i.e., errors are not propagated through timesteps.
+- Computes gradients one sample at a time to keep memory use low, but also slows down the code.
+
+# Contractor Demonstrations
+
+### Versions
+Over the course of the project we requested various demonstrations from contractors
+which we release as index files below. In general, major recorder versions change for a new
+prompt or recording feature while bug-fixes were represented as minor version changes.
+However, some
+recorder versions we asked contractors to change their username when recording particular
+modalities. Also, as contractors internally ask questions, clarification from one contractor may
+result in a behavioral change in the other contractor. It is intractable to share every contractor's
+view for each version, but we've shared the prompts and major clarifications for each recorder
+version where the task changed significantly.
+
+
+ Initial Prompt
+
+ We are collecting data for training AI models in Minecraft. You'll need to install java, download the modified version of minecraft (that collects and uploads your play data), and play minecraft survival mode! Paid per hour of gameplay. Prior experience in minecraft not. necessary. We do not collect any data that is unrelated to minecraft from your computer.
+
+
+
+The following is a list of the available versions:
+
+* **6.x** Core recorder features subject to change [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_6xx_Jun_29.json)
+ * 6.9 First feature complete recorder version
+ * 6.10 Fixes mouse scaling on Mac when gui is open
+ * 6.11 Tracks the hotbar slot
+ * 6.13 Sprinting, swap-hands, ... (see commits below)
+
+ Commits
+
+ * improve replays that are cut in the middle of gui; working on riding boats / replays cut in the middle of a run
+ * improve replays by adding dwheel action etc, also, loosen up replay tolerances
+ * opencv version bump
+ * add swap hands, and recording of the step timestamp
+ * implement replaying from running and sprinting and tests
+ * do not record sprinting (can use stats for that)
+ * check for mouse button number, ignore >2
+ * handle the errors when mouse / keyboard are recorded as null
+
+
+* **7.x** Prompt changes [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_7xx_Apr_6.json)
+ * 7.6 Bump version for internal tracking
+
+ Additional ask to contractors
+
+ Right now, early game data is especially valuable to us. As such, we request that at least half of the data you upload is from the first 30 minutes of the game. This means that, for every hour of gameplay you spend in an older world, we ask you to play two sessions in which you create a new world and play for 30 minutes. You can play for longer in these worlds, but only the first 30 minutes counts as early game data.
+
+
+* **8.x** :clipboard: House Building from Scratch Task [:arrow_down: index](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_8xx_Jun_29.json)
+
+ Changes and Prompt
+
+ Hi all! Thank you for your hard work so far.
+
+ This week we would like to have you all collect data on a specific task.
+
+ This comes with a new recorder version 8.0 which you will need to update your recording script to download.
+
+ This week we would like you to use a new world each time you play, so loading existing worlds is disabled.
+
+ The new task is as follows:
+
+ Starting in a new world, build a simple house in 10-15 minutes. This corresponds to one day and a bit of the night. Please use primarily wood, dirt, and sand, as well as crafted wood items such as doors, fences, ect. in constructing your house. Avoid using difficult items such as stone. Aside from those constraints, you may decorate the structure you build as you wish. It does not need to have any specific furniture. For example, it is OK if there is no bed in your house. If you have not finished the house by the sunrise (20 minutes) please exit and continue to another demonstration. Please continue to narrate what you are doing while completing this task.
+
+ Since you will be unable to resume building after exiting Minecraft or going back to the main menu, you must finish these demonstrations in one session. Pausing via the menu is still supported. If you want to view your creations later, they will be saved locally so you can look at them in your own time. We may use these save files in a future task so if you have space, please leave the save files titled “build-house-15-min-“.
+
+ For this week try to avoid all cobblestone / stone / granite
+
+ For this week we just want simple houses without sleeping. If 10 minutes is too short, let us know and we can think of how to adjust!
+
+ Stone tools are ok but I think you may run-out of time
+
+ Changes:
+ * Timer ends episode after 10 realtime minutes
+ * Worlds are named: `"build-house-15-min-" + Math.abs(random.nextInt());`
+
+
+
+ * Note this version introduces 10-minute timer that ends the episode. It
+ cut experiments short occasionally and was fixed in 9.1
+ * 8.0 Simple House
+ * 8.2 Update upload script
+* **9.x** :clipboard: House Building from Random Starting Materials Task [:arrow_down: index](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_9xx_Jun_29.json)
+
+ Changes and Prompt
+
+ You now will have 10 minutes to use the provided resources to build your house / home / or structure. In this version, the experiment will time out after 10 minutes if you are not complete so don't be alarmed if that happens, it is intentional.
+
+ No need to use up all the resources! It's ok to collect a few things but spend the majority of the time placing blocks (the act of placing seems to be harder to learn)
+
+ Changes:
+ * Worlds are named: `"design-house-10-min-" + Math.abs(random.nextInt());`
+ * Starting inventory given by code below
+
+
+
+ Random Starting Inventory Code
+
+ ```java
+ Random random = new Random();
+ List hotbar = new ArrayList<>();
+ List inventory = new ArrayList<>();
+
+ // Ensure we give the player the basic tools in their hot bar
+ hotbar.add(new ItemStack(Items.STONE_AXE));
+ hotbar.add(new ItemStack(Items.STONE_PICKAXE));
+ hotbar.add(new ItemStack(Items.STONE_SHOVEL));
+ hotbar.add(new ItemStack(Items.CRAFTING_TABLE));
+
+ // Add some random items to the player hotbar as well
+ addToList(hotbar, inventory, Items.TORCH, random.nextInt(16) * 2 + 2);
+
+ // Next add main building blocks
+ if (random.nextFloat() < 0.7) {
+ addToList(hotbar, inventory, Items.OAK_FENCE_GATE, random.nextInt(5));
+ addToList(hotbar, inventory, Items.OAK_FENCE, random.nextInt(5) * 64);
+ addToList(hotbar, inventory, Items.OAK_DOOR, random.nextInt(5));
+ addToList(hotbar, inventory, Items.OAK_TRAPDOOR, random.nextInt(2) * 2);
+ addToList(hotbar, inventory, Items.OAK_PLANKS, random.nextInt(3) * 64 + 128);
+ addToList(hotbar, inventory, Items.OAK_SLAB, random.nextInt(3) * 64);
+ addToList(hotbar, inventory, Items.OAK_STAIRS, random.nextInt(3) * 64);
+ addToList(hotbar, inventory, Items.OAK_LOG, random.nextInt(2) * 32);
+ addToList(hotbar, inventory, Items.OAK_PRESSURE_PLATE, random.nextInt(5));
+ } else {
+ addToList(hotbar, inventory, Items.BIRCH_FENCE_GATE, random.nextInt(5));
+ addToList(hotbar, inventory, Items.BIRCH_FENCE, random.nextInt(5) * 64);
+ addToList(hotbar, inventory, Items.BIRCH_DOOR, random.nextInt(5));
+ addToList(hotbar, inventory, Items.BIRCH_TRAPDOOR, random.nextInt(2) * 2);
+ addToList(hotbar, inventory, Items.BIRCH_PLANKS, random.nextInt(3) * 64 + 128);
+ addToList(hotbar, inventory, Items.BIRCH_SLAB, random.nextInt(3) * 64);
+ addToList(hotbar, inventory, Items.BIRCH_STAIRS, random.nextInt(3) * 64);
+ addToList(hotbar, inventory, Items.BIRCH_LOG, random.nextInt(2) * 32);
+ addToList(hotbar, inventory, Items.BIRCH_PRESSURE_PLATE, random.nextInt(5));
+ }
+
+ // Now add some random decoration items to the player inventory
+ addToList(hotbar, inventory, Items.CHEST, random.nextInt(3));
+ addToList(hotbar, inventory, Items.FURNACE, random.nextInt(2) + 1);
+ addToList(hotbar, inventory, Items.GLASS_PANE, random.nextInt(5) * 4);
+ addToList(hotbar, inventory, Items.WHITE_BED, (int) (random.nextFloat() + 0.2)); // Bed 20% of the time
+ addToList(hotbar, inventory, Items.PAINTING, (int) (random.nextFloat() + 0.1)); // Painting 10% of the time
+ addToList(hotbar, inventory, Items.FLOWER_POT, (int) (random.nextFloat() + 0.1) * 4); // 4 Flower pots 10% of the time
+ addToList(hotbar, inventory, Items.OXEYE_DAISY, (int) (random.nextFloat() + 0.1) * 4); // 4 Oxeye daisies 10% of the time
+ addToList(hotbar, inventory, Items.POPPY, (int) (random.nextFloat() + 0.1) * 4); // 4 Poppies 10% of the time
+ addToList(hotbar, inventory, Items.SUNFLOWER, (int) (random.nextFloat() + 0.1) * 4); // 4 Sunflowers 10% of the time
+
+ // Shuffle the hotbar slots and inventory slots
+ Collections.shuffle(hotbar);
+ Collections.shuffle(inventory);
+
+ // Give the player the items
+ this.mc.getIntegratedServer().getPlayerList().getPlayers().forEach(p -> {
+ if (p.getUniqueID().equals(this.getUniqueID())) {
+ hotbar.forEach(p.inventory::addItemStackToInventory);
+ inventory.forEach(p.inventory::addItemStackToInventory);
+ }
+ });
+ ```
+
+
+
+ * 9.0 First version
+ * 9.1 Fixed timer bug
+* **10.0** :clipboard: Obtain Diamond Pickaxe Task [:arrow_down: index](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/all_10xx_Jun_29.json)
+
+ Changes and Prompt
+ Prompt:
+
+ For this new task we have given you 20 minutes to craft a diamond pickaxe. We ask that you do not try to search for villages or other ways of getting diamonds, but if you are spawned in view of one, or happen to fall into a cave structure feel free to explore it for diamonds.
+ If 20 min is not enough that is OK. It will happen on some seeds because of bad luck. Please do not use glitches to find the diamonds.
+
+ Changes:
+ * change to 20 minute time limit
+ * _don't count gui time as part of the time limit_
+ * World are named `"collect-diamond-pickaxe-15min-" + Math.abs(random.nextInt());`
+
+
+
+
+Sometimes we asked the contractors to signify other tasks besides changing the version. This
+primarily occurred in versions 6 and 7 as 8, 9 and 10 are all task specific.
+
+
+Prompt to contractors (click to show)
+Another request about additional time - please use some of it to chop trees. Specifically, please start the recorder by adding --username treechop argument to the script (i.e. use play --username treechop on windows, ./play.sh --username treechop on osx/linux), and spend some time chopping trees! Getting wooden or stone tools is ok, but please spend the majority of the with username treechop specifically chopping. I did it myself for about 15 minutes, and it does get boring pretty quickly, so I don't expect you to do it all the time, but please do at least a little bit of chopping. Feel free to play normally the rest of the time (but please restart without --username treechop argument when you are not chopping)
+However, it is preferable that you start a new world though, and use only the tools that are easily obtainable in that world. I'll see what I can do about getting player an iron axe - that sounds reasonable, and should not be hard, but will require a code update.
+
+
+### Environment
+We restrict the contractors to playing Minecraft in windowed mode at 720p which we downsample at 20hz to 360p
+to minimize space. We also disabled the options screen to prevent the contractor from
+changing things such as brightness, or rendering options. We ask contractors not to press keys
+such as f3 which shows a debug overlay, however some contractors may still do this.
+
+
+### Data format
+
+Demonstrations are broken up into up to 5 minute segments consisting of a series of
+compressed screen observations, actions, environment statistics, and a checkpoint
+save file from the start of the segment. Each relative path in the index will
+have all the files for that given segment, however if a file was dropped while
+uploading, the corresponding relative path is not included in the index therefore
+there may be missing chunks from otherwise continuous demonstrations.
+
+Index files are provided for each version as a json file:
+```json
+{
+ "basedir": "https://openaipublic.blob.core.windows.net/data/",
+ "relpaths": [
+ "8.0/cheeky-cornflower-setter-74ae6c2eae2e-20220315-122354",
+ ...
+ ]
+}
+```
+Relative paths follow the following format:
+* `/---`
+
+> Note that due to network errors, some segments may be missing from otherwise
+continuous demonstrations.
+
+Your data loader can then find following files:
+* Video observation: `/.mp4`
+* Action file: `/.jsonl`
+* Options file: `/-options.json`
+* Checkpoint save file: `/.zip`
+
+The action file is **not** a valid json object: each line in
+action file is an individual action dictionary.
+
+For v7.x, the actions are in form
+```json
+{
+ "mouse": {
+ "x": 274.0,
+ "y": 338.0,
+ "dx": 0.0,
+ "dy": 0.0,
+ "scaledX": -366.0,
+ "scaledY": -22.0,
+ "dwheel": 0.0,
+ "buttons": [],
+ "newButtons": []
+ },
+ "keyboard": {
+ "keys": [
+ "key.keyboard.a",
+ "key.keyboard.s"
+ ],
+ "newKeys": [],
+ "chars": ""
+ },
+ "isGuiOpen": false,
+ "isGuiInventory": false,
+ "hotbar": 4,
+ "yaw": -112.35006,
+ "pitch": 8.099996,
+ "xpos": 841.364694513396,
+ "ypos": 63.0,
+ "zpos": 24.956354839537802,
+ "tick": 0,
+ "milli": 1649575088006,
+ "inventory": [
+ {
+ "type": "oak_door",
+ "quantity": 3
+ },
+ {
+ "type": "oak_planks",
+ "quantity": 59
+ },
+ {
+ "type": "stone_pickaxe",
+ "quantity": 1
+ },
+ {
+ "type": "oak_planks",
+ "quantity": 64
+ }
+ ],
+ "serverTick": 6001,
+ "serverTickDurationMs": 36.3466,
+ "stats": {
+ "minecraft.custom:minecraft.jump": 4,
+ "minecraft.custom:minecraft.time_since_rest": 5999,
+ "minecraft.custom:minecraft.play_one_minute": 5999,
+ "minecraft.custom:minecraft.time_since_death": 5999,
+ "minecraft.custom:minecraft.walk_one_cm": 7554,
+ "minecraft.use_item:minecraft.oak_planks": 5,
+ "minecraft.custom:minecraft.fall_one_cm": 269,
+ "minecraft.use_item:minecraft.glass_pane": 3
+ }
+}
+```
+
+# BASALT 2022 dataset
+
+We also collected a dataset of demonstrations for the [MineRL BASALT 2022](https://www.aicrowd.com/challenges/neurips-2022-minerl-basalt-competition) competition, with around 150GB of data per task.
+
+**Note**: To avoid confusion with the competition rules, the action files (.jsonl) have been stripped of information that is not allowed in the competition. We will upload unmodified dataset after the competition ends.
+
+* **FindCave** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/find-cave-Jul-28.json)
+ *
+ Prompt to contractors (click to show)
+
+ ```
+ Look around for a cave. When you are inside one, quit the game by opening main menu and pressing "Save and Quit To Title".
+ You are not allowed to dig down from the surface to find a cave.
+
+ Timelimit: 3 minutes.
+ Example recordings: https://www.youtube.com/watch?v=TclP_ozH-eg
+ ```
+
+* **MakeWaterfall** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/waterfall-Jul-28.json)
+ *
+ Prompt to contractors (click to show)
+
+ ```
+ After spawning in a mountainous area with a water bucket and various tools, build a beautiful waterfall and then reposition yourself to “take a scenic picture” of the same waterfall, and then quit the game by opening the menu and selecting "Save and Quit to Title"
+
+ Timelimit: 5 minutes.
+ Example recordings: https://youtu.be/NONcbS85NLA
+ ```
+
+* **MakeVillageAnimalPen** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/pen-animals-Jul-28.json)
+ *
+ Prompt to contractors (click to show)
+
+ ```
+ After spawning in a village, build an animal pen next to one of the houses in a village. Use your fence posts to build one animal pen that contains at least two of the same animal. (You are only allowed to pen chickens, cows, pigs, sheep or rabbits.) There should be at least one gate that allows players to enter and exit easily. The animal pen should not contain more than one type of animal. (You may kill any extra types of animals that accidentally got into the pen.) Don’t harm the village.
+ After you are done, quit the game by opening the menu and pressing "Save and Quit to Title".
+
+ You may need to terraform the area around a house to build a pen. When we say not to harm the village, examples include taking animals from existing pens, damaging existing houses or farms, and attacking villagers. Animal pens must have a single type of animal: pigs, cows, sheep, chicken or rabbits.
+
+ The food items can be used to lure in the animals: if you hold seeds in your hand, this attracts nearby chickens to you, for example.
+
+ Timelimit: 5 minutes.
+ Example recordings: https://youtu.be/SLO7sep7BO8
+ ```
+
+* **BuildVillageHouse** [:arrow_down: index file](https://openaipublic.blob.core.windows.net/minecraft-rl/snapshots/build-house-Jul-28.json)
+ *
+ Prompt to contractors (click to show)
+
+ ```
+ Taking advantage of the items in your inventory, build a new house in the style of the village (random biome), in an appropriate location (e.g. next to the path through the village), without harming the village in the process.
+ Then give a brief tour of the house (i.e. spin around slowly such that all of the walls and the roof are visible).
+
+ * You start with a stone pickaxe and a stone axe, and various building blocks. It’s okay to break items that you misplaced (e.g. use the stone pickaxe to break cobblestone blocks).
+ * You are allowed to craft new blocks.
+
+ Please spend less than ten minutes constructing your house.
+
+ You don’t need to copy another house in the village exactly (in fact, we’re more interested in having slight deviations, while keeping the same "style"). You may need to terraform the area to make space for a new house.
+ When we say not to harm the village, examples include taking animals from existing pens, damaging existing houses or farms, and attacking villagers.
+
+ After you are done, quit the game by opening the menu and pressing "Save and Quit to Title".
+
+ Timelimit: 12 minutes.
+ Example recordings: https://youtu.be/WeVqQN96V_g
+ ```
+
+
+
+
+# Contribution
+This was a large effort by a dedicated team at OpenAI:
+[Bowen Baker](https://github.com/bowenbaker),
+[Ilge Akkaya](https://github.com/ilge),
+[Peter Zhokhov](https://github.com/pzhokhov),
+[Joost Huizinga](https://github.com/JoostHuizinga),
+[Jie Tang](https://github.com/jietang),
+[Adrien Ecoffet](https://github.com/AdrienLE),
+[Brandon Houghton](https://github.com/brandonhoughton),
+[Raul Sampedro](https://github.com/samraul),
+Jeff Clune
+The code here represents a minimal version of our model code which was
+prepared by [Anssi Kanervisto](https://github.com/miffyli) and others so that these models could be used as
+part of the MineRL BASALT competition.
diff --git a/docs/src/agent.py b/docs/src/agent.py
new file mode 100644
index 0000000..12c98e2
--- /dev/null
+++ b/docs/src/agent.py
@@ -0,0 +1,206 @@
+import numpy as np
+import torch as th
+import cv2
+from gym3.types import DictType
+from gym import spaces
+
+from lib.action_mapping import CameraHierarchicalMapping
+from lib.actions import ActionTransformer
+from lib.policy import MinecraftAgentPolicy
+from lib.torch_util import default_device_type, set_default_torch_device
+
+
+# Hardcoded settings
+AGENT_RESOLUTION = (128, 128)
+
+POLICY_KWARGS = dict(
+ attention_heads=16,
+ attention_mask_style="clipped_causal",
+ attention_memory_size=256,
+ diff_mlp_embedding=False,
+ hidsize=2048,
+ img_shape=[128, 128, 3],
+ impala_chans=[16, 32, 32],
+ impala_kwargs={"post_pool_groups": 1},
+ impala_width=8,
+ init_norm_kwargs={"batch_norm": False, "group_norm_groups": 1},
+ n_recurrence_layers=4,
+ only_img_input=True,
+ pointwise_ratio=4,
+ pointwise_use_activation=False,
+ recurrence_is_residual=True,
+ recurrence_type="transformer",
+ timesteps=128,
+ use_pointwise_layer=True,
+ use_pre_lstm_ln=False,
+)
+
+PI_HEAD_KWARGS = dict(temperature=2.0)
+
+ACTION_TRANSFORMER_KWARGS = dict(
+ camera_binsize=2,
+ camera_maxval=10,
+ camera_mu=10,
+ camera_quantization_scheme="mu_law",
+)
+
+ENV_KWARGS = dict(
+ fov_range=[70, 70],
+ frameskip=1,
+ gamma_range=[2, 2],
+ guiscale_range=[1, 1],
+ resolution=[640, 360],
+ cursor_size_range=[16.0, 16.0],
+)
+
+TARGET_ACTION_SPACE = {
+ "ESC": spaces.Discrete(2),
+ "attack": spaces.Discrete(2),
+ "back": spaces.Discrete(2),
+ "camera": spaces.Box(low=-180.0, high=180.0, shape=(2,)),
+ "drop": spaces.Discrete(2),
+ "forward": spaces.Discrete(2),
+ "hotbar.1": spaces.Discrete(2),
+ "hotbar.2": spaces.Discrete(2),
+ "hotbar.3": spaces.Discrete(2),
+ "hotbar.4": spaces.Discrete(2),
+ "hotbar.5": spaces.Discrete(2),
+ "hotbar.6": spaces.Discrete(2),
+ "hotbar.7": spaces.Discrete(2),
+ "hotbar.8": spaces.Discrete(2),
+ "hotbar.9": spaces.Discrete(2),
+ "inventory": spaces.Discrete(2),
+ "jump": spaces.Discrete(2),
+ "left": spaces.Discrete(2),
+ "pickItem": spaces.Discrete(2),
+ "right": spaces.Discrete(2),
+ "sneak": spaces.Discrete(2),
+ "sprint": spaces.Discrete(2),
+ "swapHands": spaces.Discrete(2),
+ "use": spaces.Discrete(2)
+}
+
+
+def validate_env(env):
+ """Check that the MineRL environment is setup correctly, and raise if not"""
+ for key, value in ENV_KWARGS.items():
+ if key == "frameskip":
+ continue
+ if getattr(env.task, key) != value:
+ raise ValueError(f"MineRL environment setting {key} does not match {value}")
+ action_names = set(env.action_space.spaces.keys())
+ if action_names != set(TARGET_ACTION_SPACE.keys()):
+ raise ValueError(f"MineRL action space does match. Expected actions {set(TARGET_ACTION_SPACE.keys())}")
+
+ for ac_space_name, ac_space_space in TARGET_ACTION_SPACE.items():
+ if env.action_space.spaces[ac_space_name] != ac_space_space:
+ raise ValueError(f"MineRL action space setting {ac_space_name} does not match {ac_space_space}")
+
+
+def resize_image(img, target_resolution):
+ # For your sanity, do not resize with any function than INTER_LINEAR
+ img = cv2.resize(img, target_resolution, interpolation=cv2.INTER_LINEAR)
+ return img
+
+
+class MineRLAgent:
+ def __init__(self, env, device=None, policy_kwargs=None, pi_head_kwargs=None):
+ validate_env(env)
+
+ if device is None:
+ device = default_device_type()
+ self.device = th.device(device)
+ # Set the default torch device for underlying code as well
+ set_default_torch_device(self.device)
+ self.action_mapper = CameraHierarchicalMapping(n_camera_bins=11)
+ action_space = self.action_mapper.get_action_space_update()
+ action_space = DictType(**action_space)
+
+ self.action_transformer = ActionTransformer(**ACTION_TRANSFORMER_KWARGS)
+
+ if policy_kwargs is None:
+ policy_kwargs = POLICY_KWARGS
+ if pi_head_kwargs is None:
+ pi_head_kwargs = PI_HEAD_KWARGS
+
+ agent_kwargs = dict(policy_kwargs=policy_kwargs, pi_head_kwargs=pi_head_kwargs, action_space=action_space)
+
+ self.policy = MinecraftAgentPolicy(**agent_kwargs).to(device)
+ self.hidden_state = self.policy.initial_state(1)
+ self._dummy_first = th.from_numpy(np.array((False,))).to(device)
+
+ def load_weights(self, path):
+ """Load model weights from a path, and reset hidden state"""
+ self.policy.load_state_dict(th.load(path, map_location=self.device), strict=False)
+ self.reset()
+
+ def reset(self):
+ """Reset agent to initial state (i.e., reset hidden state)"""
+ self.hidden_state = self.policy.initial_state(1)
+
+ def _env_obs_to_agent(self, minerl_obs):
+ """
+ Turn observation from MineRL environment into model's observation
+
+ Returns torch tensors.
+ """
+ agent_input = resize_image(minerl_obs["pov"], AGENT_RESOLUTION)[None]
+ agent_input = {"img": th.from_numpy(agent_input).to(self.device)}
+ return agent_input
+
+ def _agent_action_to_env(self, agent_action):
+ """Turn output from policy into action for MineRL"""
+ # This is quite important step (for some reason).
+ # For the sake of your sanity, remember to do this step (manual conversion to numpy)
+ # before proceeding. Otherwise, your agent might be a little derp.
+ action = agent_action
+ if isinstance(action["buttons"], th.Tensor):
+ action = {
+ "buttons": agent_action["buttons"].cpu().numpy(),
+ "camera": agent_action["camera"].cpu().numpy()
+ }
+ minerl_action = self.action_mapper.to_factored(action)
+ minerl_action_transformed = self.action_transformer.policy2env(minerl_action)
+ return minerl_action_transformed
+
+ def _env_action_to_agent(self, minerl_action_transformed, to_torch=False, check_if_null=False):
+ """
+ Turn action from MineRL to model's action.
+
+ Note that this will add batch dimensions to the action.
+ Returns numpy arrays, unless `to_torch` is True, in which case it returns torch tensors.
+
+ If `check_if_null` is True, check if the action is null (no action) after the initial
+ transformation. This matches the behaviour done in OpenAI's VPT work.
+ If action is null, return "None" instead
+ """
+ minerl_action = self.action_transformer.env2policy(minerl_action_transformed)
+ if check_if_null:
+ if np.all(minerl_action["buttons"] == 0) and np.all(minerl_action["camera"] == self.action_transformer.camera_zero_bin):
+ return None
+
+ # Add batch dims if not existant
+ if minerl_action["camera"].ndim == 1:
+ minerl_action = {k: v[None] for k, v in minerl_action.items()}
+ action = self.action_mapper.from_factored(minerl_action)
+ if to_torch:
+ action = {k: th.from_numpy(v).to(self.device) for k, v in action.items()}
+ return action
+
+ def get_action(self, minerl_obs):
+ """
+ Get agent's action for given MineRL observation.
+
+ Agent's hidden state is tracked internally. To reset it,
+ call `reset()`.
+ """
+ agent_input = self._env_obs_to_agent(minerl_obs)
+ # The "first" argument could be used to reset tell episode
+ # boundaries, but we are only using this for predicting (for now),
+ # so we do not hassle with it yet.
+ agent_action, self.hidden_state, _ = self.policy.act(
+ agent_input, self._dummy_first, self.hidden_state,
+ stochastic=True
+ )
+ minerl_action = self._agent_action_to_env(agent_action)
+ return minerl_action
diff --git a/docs/src/behavioural_cloning.py b/docs/src/behavioural_cloning.py
new file mode 100644
index 0000000..07ea827
--- /dev/null
+++ b/docs/src/behavioural_cloning.py
@@ -0,0 +1,143 @@
+# Basic behavioural cloning
+# Note: this uses gradient accumulation in batches of ones
+# to perform training.
+# This will fit inside even smaller GPUs (tested on 8GB one),
+# but is slow.
+# NOTE: This is _not_ the original code used for VPT!
+# This is merely to illustrate how to fine-tune the models and includes
+# the processing steps used.
+
+# This will likely be much worse than what original VPT did:
+# we are not training on full sequences, but only one step at a time to save VRAM.
+
+from argparse import ArgumentParser
+import pickle
+import time
+
+import gym
+import minerl
+import torch as th
+import numpy as np
+
+from agent import PI_HEAD_KWARGS, MineRLAgent
+from data_loader import DataLoader
+from lib.tree_util import tree_map
+
+EPOCHS = 2
+# Needs to be <= number of videos
+BATCH_SIZE = 8
+# Ideally more than batch size to create
+# variation in datasets (otherwise, you will
+# get a bunch of consecutive samples)
+# Decrease this (and batch_size) if you run out of memory
+N_WORKERS = 12
+DEVICE = "cuda"
+
+LOSS_REPORT_RATE = 100
+
+LEARNING_RATE = 0.000181
+WEIGHT_DECAY = 0.039428
+MAX_GRAD_NORM = 5.0
+
+def load_model_parameters(path_to_model_file):
+ agent_parameters = pickle.load(open(path_to_model_file, "rb"))
+ policy_kwargs = agent_parameters["model"]["args"]["net"]["args"]
+ pi_head_kwargs = agent_parameters["model"]["args"]["pi_head_opts"]
+ pi_head_kwargs["temperature"] = float(pi_head_kwargs["temperature"])
+ return policy_kwargs, pi_head_kwargs
+
+def behavioural_cloning_train(data_dir, in_model, in_weights, out_weights):
+ agent_policy_kwargs, agent_pi_head_kwargs = load_model_parameters(in_model)
+
+ # To create model with the right environment.
+ # All basalt environments have the same settings, so any of them works here
+ env = gym.make("MineRLBasaltFindCave-v0")
+ agent = MineRLAgent(env, device=DEVICE, policy_kwargs=agent_policy_kwargs, pi_head_kwargs=agent_pi_head_kwargs)
+ agent.load_weights(in_weights)
+ env.close()
+
+ policy = agent.policy
+ trainable_parameters = policy.parameters()
+
+ # Parameters taken from the OpenAI VPT paper
+ optimizer = th.optim.Adam(
+ trainable_parameters,
+ lr=LEARNING_RATE,
+ weight_decay=WEIGHT_DECAY
+ )
+
+ data_loader = DataLoader(
+ dataset_dir=data_dir,
+ n_workers=N_WORKERS,
+ batch_size=BATCH_SIZE,
+ n_epochs=EPOCHS
+ )
+
+ start_time = time.time()
+
+ # Keep track of the hidden state per episode/trajectory.
+ # DataLoader provides unique id for each episode, which will
+ # be different even for the same trajectory when it is loaded
+ # up again
+ episode_hidden_states = {}
+ dummy_first = th.from_numpy(np.array((False,))).to(DEVICE)
+
+ loss_sum = 0
+ for batch_i, (batch_images, batch_actions, batch_episode_id) in enumerate(data_loader):
+ batch_loss = 0
+ for image, action, episode_id in zip(batch_images, batch_actions, batch_episode_id):
+ agent_action = agent._env_action_to_agent(action, to_torch=True, check_if_null=True)
+ if agent_action is None:
+ # Action was null
+ continue
+
+ agent_obs = agent._env_obs_to_agent({"pov": image})
+ if episode_id not in episode_hidden_states:
+ # TODO need to clean up this hidden state after worker is done with the work item.
+ # Leaks memory, but not tooooo much at these scales (will be a problem later).
+ episode_hidden_states[episode_id] = policy.initial_state(1)
+ agent_state = episode_hidden_states[episode_id]
+
+ pi_distribution, v_prediction, new_agent_state = policy.get_output_for_observation(
+ agent_obs,
+ agent_state,
+ dummy_first
+ )
+
+ log_prob = policy.get_logprob_of_action(pi_distribution, agent_action)
+
+ # Make sure we do not try to backprop through sequence
+ # (fails with current accumulation)
+ new_agent_state = tree_map(lambda x: x.detach(), new_agent_state)
+ episode_hidden_states[episode_id] = new_agent_state
+
+ # Finally, update the agent to increase the probability of the
+ # taken action.
+ # Remember to take mean over batch losses
+ loss = -log_prob / BATCH_SIZE
+ batch_loss += loss.item()
+ loss.backward()
+
+ th.nn.utils.clip_grad_norm_(trainable_parameters, MAX_GRAD_NORM)
+ optimizer.step()
+ optimizer.zero_grad()
+
+ loss_sum += batch_loss
+ if batch_i % LOSS_REPORT_RATE == 0:
+ time_since_start = time.time() - start_time
+ print(f"Time: {time_since_start:.2f}, Batches: {batch_i}, Avrg loss: {loss_sum / LOSS_REPORT_RATE:.4f}")
+ loss_sum = 0
+
+ state_dict = policy.state_dict()
+ th.save(state_dict, out_weights)
+
+
+if __name__ == "__main__":
+ parser = ArgumentParser()
+ parser.add_argument("--data-dir", type=str, required=True, help="Path to the directory containing recordings to be trained on")
+ parser.add_argument("--in-model", required=True, type=str, help="Path to the .model file to be finetuned")
+ parser.add_argument("--in-weights", required=True, type=str, help="Path to the .weights file to be finetuned")
+ parser.add_argument("--out-weights", required=True, type=str, help="Path where finetuned weights will be saved")
+
+ args = parser.parse_args()
+ behavioural_cloning_train(args.data_dir, args.in_model, args.in_weights, args.out_weights)
diff --git a/docs/src/data_loader.py b/docs/src/data_loader.py
new file mode 100644
index 0000000..b8d14d5
--- /dev/null
+++ b/docs/src/data_loader.py
@@ -0,0 +1,222 @@
+# Code for loading OpenAI MineRL VPT datasets
+# NOTE: This is NOT original code used for the VPT experiments!
+# (But contains all [or at least most] steps done in the original data loading)
+
+import json
+import glob
+import os
+import random
+from multiprocessing import Process, Queue, Event
+
+import numpy as np
+import cv2
+
+from run_inverse_dynamics_model import json_action_to_env_action
+from agent import resize_image, AGENT_RESOLUTION
+
+QUEUE_TIMEOUT = 10
+
+CURSOR_FILE = os.path.join(os.path.dirname(__file__), "cursors", "mouse_cursor_white_16x16.png")
+
+MINEREC_ORIGINAL_HEIGHT_PX = 720
+
+# If GUI is open, mouse dx/dy need also be adjusted with these scalers.
+# If data version is not present, assume it is 1.
+MINEREC_VERSION_SPECIFIC_SCALERS = {
+ "5.7": 0.5,
+ "5.8": 0.5,
+ "6.7": 2.0,
+ "6.8": 2.0,
+ "6.9": 2.0,
+}
+
+
+def composite_images_with_alpha(image1, image2, alpha, x, y):
+ """
+ Draw image2 over image1 at location x,y, using alpha as the opacity for image2.
+
+ Modifies image1 in-place
+ """
+ ch = max(0, min(image1.shape[0] - y, image2.shape[0]))
+ cw = max(0, min(image1.shape[1] - x, image2.shape[1]))
+ if ch == 0 or cw == 0:
+ return
+ alpha = alpha[:ch, :cw]
+ image1[y:y + ch, x:x + cw, :] = (image1[y:y + ch, x:x + cw, :] * (1 - alpha) + image2[:ch, :cw, :] * alpha).astype(np.uint8)
+
+
+def data_loader_worker(tasks_queue, output_queue, quit_workers_event):
+ """
+ Worker for the data loader.
+ """
+ cursor_image = cv2.imread(CURSOR_FILE, cv2.IMREAD_UNCHANGED)
+ # Assume 16x16
+ cursor_image = cursor_image[:16, :16, :]
+ cursor_alpha = cursor_image[:, :, 3:] / 255.0
+ cursor_image = cursor_image[:, :, :3]
+
+ while True:
+ task = tasks_queue.get()
+ if task is None:
+ break
+ trajectory_id, video_path, json_path = task
+ video = cv2.VideoCapture(video_path)
+ # NOTE: In some recordings, the game seems to start
+ # with attack always down from the beginning, which
+ # is stuck down until player actually presses attack
+ # NOTE: It is uncertain if this was the issue with the original code.
+ attack_is_stuck = False
+ # Scrollwheel is allowed way to change items, but this is
+ # not captured by the recorder.
+ # Work around this by keeping track of selected hotbar item
+ # and updating "hotbar.#" actions when hotbar selection changes.
+ # NOTE: It is uncertain is this was/is an issue with the contractor data
+ last_hotbar = 0
+
+ with open(json_path) as json_file:
+ json_lines = json_file.readlines()
+ json_data = "[" + ",".join(json_lines) + "]"
+ json_data = json.loads(json_data)
+ for i in range(len(json_data)):
+ if quit_workers_event.is_set():
+ break
+ step_data = json_data[i]
+
+ if i == 0:
+ # Check if attack will be stuck down
+ if step_data["mouse"]["newButtons"] == [0]:
+ attack_is_stuck = True
+ elif attack_is_stuck:
+ # Check if we press attack down, then it might not be stuck
+ if 0 in step_data["mouse"]["newButtons"]:
+ attack_is_stuck = False
+ # If still stuck, remove the action
+ if attack_is_stuck:
+ step_data["mouse"]["buttons"] = [button for button in step_data["mouse"]["buttons"] if button != 0]
+
+ action, is_null_action = json_action_to_env_action(step_data)
+
+ # Update hotbar selection
+ current_hotbar = step_data["hotbar"]
+ if current_hotbar != last_hotbar:
+ action["hotbar.{}".format(current_hotbar + 1)] = 1
+ last_hotbar = current_hotbar
+
+ # Read frame even if this is null so we progress forward
+ ret, frame = video.read()
+ if ret:
+ # Skip null actions as done in the VPT paper
+ # NOTE: in VPT paper, this was checked _after_ transforming into agent's action-space.
+ # We do this here as well to reduce amount of data sent over.
+ if is_null_action:
+ continue
+ if step_data["isGuiOpen"]:
+ camera_scaling_factor = frame.shape[0] / MINEREC_ORIGINAL_HEIGHT_PX
+ cursor_x = int(step_data["mouse"]["x"] * camera_scaling_factor)
+ cursor_y = int(step_data["mouse"]["y"] * camera_scaling_factor)
+ composite_images_with_alpha(frame, cursor_image, cursor_alpha, cursor_x, cursor_y)
+ cv2.cvtColor(frame, code=cv2.COLOR_BGR2RGB, dst=frame)
+ frame = np.asarray(np.clip(frame, 0, 255), dtype=np.uint8)
+ frame = resize_image(frame, AGENT_RESOLUTION)
+ output_queue.put((trajectory_id, frame, action), timeout=QUEUE_TIMEOUT)
+ else:
+ print(f"Could not read frame from video {video_path}")
+ video.release()
+ if quit_workers_event.is_set():
+ break
+ # Tell that we ended
+ output_queue.put(None)
+
+class DataLoader:
+ """
+ Generator class for loading batches from a dataset
+
+ This only returns a single step at a time per worker; no sub-sequences.
+ Idea is that you keep track of the model's hidden state and feed that in,
+ along with one sample at a time.
+
+ + Simpler loader code
+ + Supports lower end hardware
+ - Not very efficient (could be faster)
+ - No support for sub-sequences
+ - Loads up individual files as trajectory files (i.e. if a trajectory is split into multiple files,
+ this code will load it up as a separate item).
+ """
+ def __init__(self, dataset_dir, n_workers=8, batch_size=8, n_epochs=1, max_queue_size=16):
+ assert n_workers >= batch_size, "Number of workers must be equal or greater than batch size"
+ self.dataset_dir = dataset_dir
+ self.n_workers = n_workers
+ self.n_epochs = n_epochs
+ self.batch_size = batch_size
+ self.max_queue_size = max_queue_size
+ unique_ids = glob.glob(os.path.join(dataset_dir, "*.mp4"))
+ unique_ids = list(set([os.path.basename(x).split(".")[0] for x in unique_ids]))
+ self.unique_ids = unique_ids
+ # Create tuples of (video_path, json_path) for each unique_id
+ demonstration_tuples = []
+ for unique_id in unique_ids:
+ video_path = os.path.abspath(os.path.join(dataset_dir, unique_id + ".mp4"))
+ json_path = os.path.abspath(os.path.join(dataset_dir, unique_id + ".jsonl"))
+ demonstration_tuples.append((video_path, json_path))
+
+ assert n_workers <= len(demonstration_tuples), f"n_workers should be lower or equal than number of demonstrations {len(demonstration_tuples)}"
+
+ # Repeat dataset for n_epochs times, shuffling the order for
+ # each epoch
+ self.demonstration_tuples = []
+ for i in range(n_epochs):
+ random.shuffle(demonstration_tuples)
+ self.demonstration_tuples += demonstration_tuples
+
+ self.task_queue = Queue()
+ self.n_steps_processed = 0
+ for trajectory_id, task in enumerate(self.demonstration_tuples):
+ self.task_queue.put((trajectory_id, *task))
+ for _ in range(n_workers):
+ self.task_queue.put(None)
+
+ self.output_queues = [Queue(maxsize=max_queue_size) for _ in range(n_workers)]
+ self.quit_workers_event = Event()
+ self.processes = [
+ Process(
+ target=data_loader_worker,
+ args=(
+ self.task_queue,
+ output_queue,
+ self.quit_workers_event,
+ ),
+ daemon=True
+ )
+ for output_queue in self.output_queues
+ ]
+ for process in self.processes:
+ process.start()
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ batch_frames = []
+ batch_actions = []
+ batch_episode_id = []
+
+ for i in range(self.batch_size):
+ workitem = self.output_queues[self.n_steps_processed % self.n_workers].get(timeout=QUEUE_TIMEOUT)
+ if workitem is None:
+ # Stop iteration when first worker runs out of work to do.
+ # Yes, this has a chance of cutting out a lot of the work,
+ # but this ensures batches will remain diverse, instead
+ # of having bad ones in the end where potentially
+ # one worker outputs all samples to the same batch.
+ raise StopIteration()
+ trajectory_id, frame, action = workitem
+ batch_frames.append(frame)
+ batch_actions.append(action)
+ batch_episode_id.append(trajectory_id)
+ self.n_steps_processed += 1
+ return batch_frames, batch_actions, batch_episode_id
+
+ def __del__(self):
+ for process in self.processes:
+ process.terminate()
+ process.join()
diff --git a/docs/src/inverse_dynamics_model.py b/docs/src/inverse_dynamics_model.py
new file mode 100644
index 0000000..ac4e8fd
--- /dev/null
+++ b/docs/src/inverse_dynamics_model.py
@@ -0,0 +1,95 @@
+import numpy as np
+import torch as th
+import cv2
+from gym3.types import DictType
+from gym import spaces
+
+from lib.action_mapping import CameraHierarchicalMapping, IDMActionMapping
+from lib.actions import ActionTransformer
+from lib.policy import InverseActionPolicy
+from lib.torch_util import default_device_type, set_default_torch_device
+from agent import resize_image, AGENT_RESOLUTION
+
+
+ACTION_TRANSFORMER_KWARGS = dict(
+ camera_binsize=2,
+ camera_maxval=10,
+ camera_mu=10,
+ camera_quantization_scheme="mu_law",
+)
+
+class IDMAgent:
+ """
+ Sugarcoating on the inverse dynamics model (IDM) used to predict actions Minecraft players take in videos.
+
+ Functionally same as MineRLAgent.
+ """
+ def __init__(self, idm_net_kwargs, pi_head_kwargs, device=None):
+ if device is None:
+ device = default_device_type()
+ self.device = th.device(device)
+ # Set the default torch device for underlying code as well
+ set_default_torch_device(self.device)
+ self.action_mapper = IDMActionMapping(n_camera_bins=11)
+ action_space = self.action_mapper.get_action_space_update()
+ action_space = DictType(**action_space)
+
+ self.action_transformer = ActionTransformer(**ACTION_TRANSFORMER_KWARGS)
+
+ idm_policy_kwargs = dict(idm_net_kwargs=idm_net_kwargs, pi_head_kwargs=pi_head_kwargs, action_space=action_space)
+
+ self.policy = InverseActionPolicy(**idm_policy_kwargs).to(device)
+ self.hidden_state = self.policy.initial_state(1)
+ self._dummy_first = th.from_numpy(np.array((False,))).to(device)
+
+ def load_weights(self, path):
+ """Load model weights from a path, and reset hidden state"""
+ self.policy.load_state_dict(th.load(path, map_location=self.device), strict=False)
+ self.reset()
+
+ def reset(self):
+ """Reset agent to initial state (i.e., reset hidden state)"""
+ self.hidden_state = self.policy.initial_state(1)
+
+ def _video_obs_to_agent(self, video_frames):
+ imgs = [resize_image(frame, AGENT_RESOLUTION) for frame in video_frames]
+ # Add time and batch dim
+ imgs = np.stack(imgs)[None]
+ agent_input = {"img": th.from_numpy(imgs).to(self.device)}
+ return agent_input
+
+ def _agent_action_to_env(self, agent_action):
+ """Turn output from policy into action for MineRL"""
+ # This is quite important step (for some reason).
+ # For the sake of your sanity, remember to do this step (manual conversion to numpy)
+ # before proceeding. Otherwise, your agent might be a little derp.
+ action = {
+ "buttons": agent_action["buttons"].cpu().numpy(),
+ "camera": agent_action["camera"].cpu().numpy()
+ }
+ minerl_action = self.action_mapper.to_factored(action)
+ minerl_action_transformed = self.action_transformer.policy2env(minerl_action)
+ return minerl_action_transformed
+
+ def predict_actions(self, video_frames):
+ """
+ Predict actions for a sequence of frames.
+
+ `video_frames` should be of shape (N, H, W, C).
+ Returns MineRL action dict, where each action head
+ has shape (N, ...).
+
+ Agent's hidden state is tracked internally. To reset it,
+ call `reset()`.
+ """
+ agent_input = self._video_obs_to_agent(video_frames)
+ # The "first" argument could be used to reset tell episode
+ # boundaries, but we are only using this for predicting (for now),
+ # so we do not hassle with it yet.
+ dummy_first = th.zeros((video_frames.shape[0], 1)).to(self.device)
+ predicted_actions, self.hidden_state, _ = self.policy.predict(
+ agent_input, first=dummy_first, state_in=self.hidden_state,
+ deterministic=True
+ )
+ predicted_minerl_action = self._agent_action_to_env(predicted_actions)
+ return predicted_minerl_action
diff --git a/docs/src/lib/action_head.py b/docs/src/lib/action_head.py
new file mode 100644
index 0000000..fc11eec
--- /dev/null
+++ b/docs/src/lib/action_head.py
@@ -0,0 +1,275 @@
+import logging
+from typing import Any, Tuple
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+from gym3.types import DictType, Discrete, Real, TensorType, ValType
+
+LOG0 = -100
+
+
+def fan_in_linear(module: nn.Module, scale=1.0, bias=True):
+ """Fan-in init"""
+ module.weight.data *= scale / module.weight.norm(dim=1, p=2, keepdim=True)
+
+ if bias:
+ module.bias.data *= 0
+
+
+class ActionHead(nn.Module):
+ """Abstract base class for action heads compatible with forc"""
+
+ def forward(self, input_data: torch.Tensor) -> Any:
+ """
+ Just a forward pass through this head
+ :returns pd_params - parameters describing the probability distribution
+ """
+ raise NotImplementedError
+
+ def logprob(self, action_sample: torch.Tensor, pd_params: torch.Tensor) -> torch.Tensor:
+ """Logartithm of probability of sampling `action_sample` from a probability described by `pd_params`"""
+ raise NotImplementedError
+
+ def entropy(self, pd_params: torch.Tensor) -> torch.Tensor:
+ """Entropy of this distribution"""
+ raise NotImplementedError
+
+ def sample(self, pd_params: torch.Tensor, deterministic: bool = False) -> Any:
+ """
+ Draw a sample from probability distribution given by those params
+
+ :param pd_params Parameters of a probability distribution
+ :param deterministic Whether to return a stochastic sample or deterministic mode of a distribution
+ """
+ raise NotImplementedError
+
+ def kl_divergence(self, params_q: torch.Tensor, params_p: torch.Tensor) -> torch.Tensor:
+ """KL divergence between two distribution described by these two params"""
+ raise NotImplementedError
+
+
+class DiagGaussianActionHead(ActionHead):
+ """
+ Action head where actions are normally distributed uncorrelated variables with specific means and variances.
+
+ Means are calculated directly from the network while standard deviations are a parameter of this module
+ """
+
+ LOG2PI = np.log(2.0 * np.pi)
+
+ def __init__(self, input_dim: int, num_dimensions: int):
+ super().__init__()
+
+ self.input_dim = input_dim
+ self.num_dimensions = num_dimensions
+
+ self.linear_layer = nn.Linear(input_dim, num_dimensions)
+ self.log_std = nn.Parameter(torch.zeros(num_dimensions), requires_grad=True)
+
+ def reset_parameters(self):
+ init.orthogonal_(self.linear_layer.weight, gain=0.01)
+ init.constant_(self.linear_layer.bias, 0.0)
+
+ def forward(self, input_data: torch.Tensor, mask=None) -> torch.Tensor:
+ assert not mask, "Can not use a mask in a gaussian action head"
+ means = self.linear_layer(input_data)
+ # Unsqueeze many times to get to the same shape
+ logstd = self.log_std[(None,) * (len(means.shape) - 1)]
+
+ mean_view, logstd = torch.broadcast_tensors(means, logstd)
+
+ return torch.stack([mean_view, logstd], dim=-1)
+
+ def logprob(self, action_sample: torch.Tensor, pd_params: torch.Tensor) -> torch.Tensor:
+ """Log-likelihood"""
+ means = pd_params[..., 0]
+ log_std = pd_params[..., 1]
+
+ std = torch.exp(log_std)
+
+ z_score = (action_sample - means) / std
+
+ return -(0.5 * ((z_score ** 2 + self.LOG2PI).sum(dim=-1)) + log_std.sum(dim=-1))
+
+ def entropy(self, pd_params: torch.Tensor) -> torch.Tensor:
+ """
+ Categorical distribution entropy calculation - sum probs * log(probs).
+ In case of diagonal gaussian distribution - 1/2 log(2 pi e sigma^2)
+ """
+ log_std = pd_params[..., 1]
+ return (log_std + 0.5 * (self.LOG2PI + 1)).sum(dim=-1)
+
+ def sample(self, pd_params: torch.Tensor, deterministic: bool = False) -> torch.Tensor:
+ means = pd_params[..., 0]
+ log_std = pd_params[..., 1]
+
+ if deterministic:
+ return means
+ else:
+ return torch.randn_like(means) * torch.exp(log_std) + means
+
+ def kl_divergence(self, params_q: torch.Tensor, params_p: torch.Tensor) -> torch.Tensor:
+ """
+ Categorical distribution KL divergence calculation
+ KL(Q || P) = sum Q_i log (Q_i / P_i)
+
+ Formula is:
+ log(sigma_p) - log(sigma_q) + (sigma_q^2 + (mu_q - mu_p)^2))/(2 * sigma_p^2)
+ """
+ means_q = params_q[..., 0]
+ log_std_q = params_q[..., 1]
+
+ means_p = params_p[..., 0]
+ log_std_p = params_p[..., 1]
+
+ std_q = torch.exp(log_std_q)
+ std_p = torch.exp(log_std_p)
+
+ kl_div = log_std_p - log_std_q + (std_q ** 2 + (means_q - means_p) ** 2) / (2.0 * std_p ** 2) - 0.5
+
+ return kl_div.sum(dim=-1, keepdim=True)
+
+
+class CategoricalActionHead(ActionHead):
+ """Action head with categorical actions"""
+
+ def __init__(
+ self, input_dim: int, shape: Tuple[int], num_actions: int, builtin_linear_layer: bool = True, temperature: float = 1.0
+ ):
+ super().__init__()
+
+ self.input_dim = input_dim
+ self.num_actions = num_actions
+ self.output_shape = shape + (num_actions,)
+ self.temperature = temperature
+
+ if builtin_linear_layer:
+ self.linear_layer = nn.Linear(input_dim, np.prod(self.output_shape))
+ else:
+ assert (
+ input_dim == num_actions
+ ), f"If input_dim ({input_dim}) != num_actions ({num_actions}), you need a linear layer to convert them."
+ self.linear_layer = None
+
+ def reset_parameters(self):
+ if self.linear_layer is not None:
+ init.orthogonal_(self.linear_layer.weight, gain=0.01)
+ init.constant_(self.linear_layer.bias, 0.0)
+ finit.fan_in_linear(self.linear_layer, scale=0.01)
+
+ def forward(self, input_data: torch.Tensor, mask=None) -> Any:
+ if self.linear_layer is not None:
+ flat_out = self.linear_layer(input_data)
+ else:
+ flat_out = input_data
+ shaped_out = flat_out.reshape(flat_out.shape[:-1] + self.output_shape)
+ shaped_out /= self.temperature
+ if mask is not None:
+ shaped_out[~mask] = LOG0
+
+ # Convert to float32 to avoid RuntimeError: "log_softmax_lastdim_kernel_impl" not implemented for 'Half'
+ return F.log_softmax(shaped_out.float(), dim=-1)
+
+ def logprob(self, actions: torch.Tensor, logits: torch.Tensor) -> torch.Tensor:
+ value = actions.long().unsqueeze(-1)
+ value, log_pmf = torch.broadcast_tensors(value, logits)
+ value = value[..., :1]
+ result = log_pmf.gather(-1, value).squeeze(-1)
+ # result is per-entry, still of size self.output_shape[:-1]; we need to reduce of the rest of it.
+ for _ in self.output_shape[:-1]:
+ result = result.sum(dim=-1)
+ return result
+
+ def entropy(self, logits: torch.Tensor) -> torch.Tensor:
+ """Categorical distribution entropy calculation - sum probs * log(probs)"""
+ probs = torch.exp(logits)
+ entropy = -torch.sum(probs * logits, dim=-1)
+ # entropy is per-entry, still of size self.output_shape[:-1]; we need to reduce of the rest of it.
+ for _ in self.output_shape[:-1]:
+ entropy = entropy.sum(dim=-1)
+ return entropy
+
+ def sample(self, logits: torch.Tensor, deterministic: bool = False) -> Any:
+ if deterministic:
+ return torch.argmax(logits, dim=-1)
+ else:
+ # Gumbel-Softmax trick.
+ u = torch.rand_like(logits)
+ # In float16, if you have around 2^{float_mantissa_bits} logits, sometimes you'll sample 1.0
+ # Then the log(-log(1.0)) will give -inf when it should give +inf
+ # This is a silly hack to get around that.
+ # This hack does not skew the probability distribution, because this event can't possibly win the argmax.
+ u[u == 1.0] = 0.999
+
+ return torch.argmax(logits - torch.log(-torch.log(u)), dim=-1)
+
+ def kl_divergence(self, logits_q: torch.Tensor, logits_p: torch.Tensor) -> torch.Tensor:
+ """
+ Categorical distribution KL divergence calculation
+ KL(Q || P) = sum Q_i log (Q_i / P_i)
+ When talking about logits this is:
+ sum exp(Q_i) * (Q_i - P_i)
+ """
+ kl = (torch.exp(logits_q) * (logits_q - logits_p)).sum(-1, keepdim=True)
+ # kl is per-entry, still of size self.output_shape; we need to reduce of the rest of it.
+ for _ in self.output_shape[:-1]:
+ kl = kl.sum(dim=-2) # dim=-2 because we use keepdim=True above.
+ return kl
+
+
+class DictActionHead(nn.ModuleDict):
+ """Action head with multiple sub-actions"""
+
+ def reset_parameters(self):
+ for subhead in self.values():
+ subhead.reset_parameters()
+
+ def forward(self, input_data: torch.Tensor, **kwargs) -> Any:
+ """
+ :param kwargs: each kwarg should be a dict with keys corresponding to self.keys()
+ e.g. if this ModuleDict has submodules keyed by 'A', 'B', and 'C', we could call:
+ forward(input_data, foo={'A': True, 'C': False}, bar={'A': 7}}
+ Then children will be called with:
+ A: forward(input_data, foo=True, bar=7)
+ B: forward(input_data)
+ C: forward(input_Data, foo=False)
+ """
+ result = {}
+ for head_name, subhead in self.items():
+ head_kwargs = {
+ kwarg_name: kwarg[head_name]
+ for kwarg_name, kwarg in kwargs.items()
+ if kwarg is not None and head_name in kwarg
+ }
+ result[head_name] = subhead(input_data, **head_kwargs)
+ return result
+
+ def logprob(self, actions: torch.Tensor, logits: torch.Tensor) -> torch.Tensor:
+ return sum(subhead.logprob(actions[k], logits[k]) for k, subhead in self.items())
+
+ def sample(self, logits: torch.Tensor, deterministic: bool = False) -> Any:
+ return {k: subhead.sample(logits[k], deterministic) for k, subhead in self.items()}
+
+ def entropy(self, logits: torch.Tensor) -> torch.Tensor:
+ return sum(subhead.entropy(logits[k]) for k, subhead in self.items())
+
+ def kl_divergence(self, logits_q: torch.Tensor, logits_p: torch.Tensor) -> torch.Tensor:
+ return sum(subhead.kl_divergence(logits_q[k], logits_p[k]) for k, subhead in self.items())
+
+
+def make_action_head(ac_space: ValType, pi_out_size: int, temperature: float = 1.0):
+ """Helper function to create an action head corresponding to the environment action space"""
+ if isinstance(ac_space, TensorType):
+ if isinstance(ac_space.eltype, Discrete):
+ return CategoricalActionHead(pi_out_size, ac_space.shape, ac_space.eltype.n, temperature=temperature)
+ elif isinstance(ac_space.eltype, Real):
+ if temperature != 1.0:
+ logging.warning("Non-1 temperature not implemented for DiagGaussianActionHead.")
+ assert len(ac_space.shape) == 1, "Nontrivial shapes not yet implemented."
+ return DiagGaussianActionHead(pi_out_size, ac_space.shape[0])
+ elif isinstance(ac_space, DictType):
+ return DictActionHead({k: make_action_head(v, pi_out_size, temperature) for k, v in ac_space.items()})
+ raise NotImplementedError(f"Action space of type {type(ac_space)} is not supported")
diff --git a/docs/src/lib/action_mapping.py b/docs/src/lib/action_mapping.py
new file mode 100644
index 0000000..f6eb5c5
--- /dev/null
+++ b/docs/src/lib/action_mapping.py
@@ -0,0 +1,235 @@
+import abc
+import itertools
+from collections import OrderedDict
+from typing import Dict, List
+
+import numpy as np
+from gym3.types import DictType, Discrete, TensorType
+
+from lib.actions import Buttons
+
+
+class ActionMapping(abc.ABC):
+ """Class that maps between the standard MC factored action space and a new one you define!
+
+ :param n_camera_bins: Need to specify this to define the original ac space for stats code
+ """
+
+ # This is the default buttons groups, it can be changed for your action space
+ BUTTONS_GROUPS = OrderedDict(
+ hotbar=["none"] + [f"hotbar.{i}" for i in range(1, 10)],
+ fore_back=["none", "forward", "back"],
+ left_right=["none", "left", "right"],
+ sprint_sneak=["none", "sprint", "sneak"],
+ use=["none", "use"],
+ drop=["none", "drop"],
+ attack=["none", "attack"],
+ jump=["none", "jump"],
+ )
+
+ def __init__(self, n_camera_bins: int = 11):
+ assert n_camera_bins % 2 == 1, "n_camera_bins should be odd"
+ self.n_camera_bins = n_camera_bins
+ self.camera_null_bin = n_camera_bins // 2
+ self.stats_ac_space = DictType(
+ **{
+ "buttons": TensorType(shape=(len(Buttons.ALL),), eltype=Discrete(2)),
+ "camera": TensorType(shape=(2,), eltype=Discrete(n_camera_bins)),
+ }
+ )
+
+ @abc.abstractmethod
+ def from_factored(self, ac: Dict) -> Dict:
+ """Converts a factored action (ac) to the new space
+
+ :param ac: Dictionary of actions that must have a batch dimension
+ """
+ pass
+
+ @abc.abstractmethod
+ def to_factored(self, ac: Dict) -> Dict:
+ """Converts an action in the new space (ac) to the factored action space.
+
+ :param ac: Dictionary of actions that must have a batch dimension
+ """
+ pass
+
+ @abc.abstractmethod
+ def get_action_space_update(self):
+ """Return a magym (gym3) action space. This will be used to update the env action space."""
+ pass
+
+ @abc.abstractmethod
+ def get_zero_action(self):
+ """Return the zero or null action for this action space"""
+ pass
+
+ def factored_buttons_to_groups(self, ac_buttons: np.ndarray, button_group: List[str]) -> List[str]:
+ """For a mutually exclusive group of buttons in button_group, find which option
+ in the group was chosen. Assumes that each button group has the option of 'none'
+ meaning that no button in the group was pressed.
+
+ :param ac_buttons: button actions from the factored action space. Should dims [B, len(Buttons.ALL)]
+ :param button_group: List of buttons in a mutually exclusive group. Each item in the
+ list should appear in Buttons.ALL except for the special case 'none' which means
+ no button in the group was pressed. e.g. ['none', 'forward', 'back']. For now
+ 'none' must be the first element of button_group
+
+ Returns a list of length B, where each element is an item from button_group.
+ """
+ assert ac_buttons.shape[1] == len(
+ Buttons.ALL
+ ), f"There should be {len(Buttons.ALL)} buttons in the factored buttons space"
+ assert button_group[0] == "none", "This function only works if 'none' is in button_group"
+ # Actions in ac_buttons with order according to button_group
+ group_indices = [Buttons.ALL.index(b) for b in button_group if b != "none"]
+ ac_choices = ac_buttons[:, group_indices]
+
+ # Special cases for forward/back, left/right where mutual press means do neither
+ if "forward" in button_group and "back" in button_group:
+ ac_choices[np.all(ac_choices, axis=-1)] = 0
+ if "left" in button_group and "right" in button_group:
+ ac_choices[np.all(ac_choices, axis=-1)] = 0
+ ac_non_zero = np.where(ac_choices)
+ ac_choice = ["none" for _ in range(ac_buttons.shape[0])]
+ # Iterate over the non-zero indices so that if two buttons in a group were pressed at the same time
+ # we give priority to the button later in the group. E.g. if hotbar.1 and hotbar.2 are pressed during the same
+ # timestep, hotbar.2 is marked as pressed
+ for index, action in zip(ac_non_zero[0], ac_non_zero[1]):
+ ac_choice[index] = button_group[action + 1] # the zero'th index will mean no button pressed
+ return ac_choice
+
+class IDMActionMapping(ActionMapping):
+ """For IDM, but essentially this is just an identity mapping"""
+ def from_factored(self, ac: Dict) -> Dict:
+ return ac
+
+ def to_factored(self, ac: Dict) -> Dict:
+ return ac
+
+ def get_action_space_update(self):
+ """Return a magym (gym3) action space. This will be used to update the env action space."""
+ return {
+ "buttons": TensorType(shape=(len(Buttons.ALL),), eltype=Discrete(2)),
+ "camera": TensorType(shape=(2,), eltype=Discrete(self.n_camera_bins)),
+ }
+
+ def get_zero_action(self):
+ raise NotImplementedError()
+
+class CameraHierarchicalMapping(ActionMapping):
+ """Buttons are joint as in ButtonsJointMapping, but now a camera on/off meta action is added into this joint space.
+ When this meta action is triggered, the separate camera head chooses a camera action which is also now a joint space.
+
+ :param n_camera_bins: number of camera bins in the factored space
+ """
+
+ # Add camera meta action to BUTTONS_GROUPS
+ BUTTONS_GROUPS = ActionMapping.BUTTONS_GROUPS.copy()
+ BUTTONS_GROUPS["camera"] = ["none", "camera"]
+ BUTTONS_COMBINATIONS = list(itertools.product(*BUTTONS_GROUPS.values())) + ["inventory"]
+ BUTTONS_COMBINATION_TO_IDX = {comb: i for i, comb in enumerate(BUTTONS_COMBINATIONS)}
+ BUTTONS_IDX_TO_COMBINATION = {i: comb for i, comb in enumerate(BUTTONS_COMBINATIONS)}
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.camera_groups = OrderedDict(
+ camera_x=[f"camera_x{i}" for i in range(self.n_camera_bins)],
+ camera_y=[f"camera_y{i}" for i in range(self.n_camera_bins)],
+ )
+ self.camera_combinations = list(itertools.product(*self.camera_groups.values()))
+ self.camera_combination_to_idx = {comb: i for i, comb in enumerate(self.camera_combinations)}
+ self.camera_idx_to_combination = {i: comb for i, comb in enumerate(self.camera_combinations)}
+ self.camera_null_idx = self.camera_combination_to_idx[
+ (f"camera_x{self.camera_null_bin}", f"camera_y{self.camera_null_bin}")
+ ]
+ self._null_action = {
+ "buttons": self.BUTTONS_COMBINATION_TO_IDX[tuple("none" for _ in range(len(self.BUTTONS_GROUPS)))]
+ }
+ self._precompute_to_factored()
+
+ def _precompute_to_factored(self):
+ """Precompute the joint action -> factored action matrix."""
+ button_dim = self.stats_ac_space["buttons"].size
+ self.BUTTON_IDX_TO_FACTORED = np.zeros((len(self.BUTTONS_IDX_TO_COMBINATION), button_dim), dtype=int)
+ self.BUTTON_IDX_TO_CAMERA_META_OFF = np.zeros((len(self.BUTTONS_IDX_TO_COMBINATION)), dtype=bool)
+ self.CAMERA_IDX_TO_FACTORED = np.zeros((len(self.camera_idx_to_combination), 2), dtype=int)
+
+ # Pre compute Buttons
+ for jnt_ac, button_comb in self.BUTTONS_IDX_TO_COMBINATION.items():
+ new_button_ac = np.zeros(len(Buttons.ALL), dtype="i")
+ if button_comb == "inventory":
+ new_button_ac[Buttons.ALL.index("inventory")] = 1
+ else:
+ for group_choice in button_comb[:-1]: # Last one is camera
+ if group_choice != "none":
+ new_button_ac[Buttons.ALL.index(group_choice)] = 1
+
+ if button_comb[-1] != "camera": # This means camera meta action is off
+ self.BUTTON_IDX_TO_CAMERA_META_OFF[jnt_ac] = True
+ self.BUTTON_IDX_TO_FACTORED[jnt_ac] = new_button_ac
+
+ # Pre compute camera
+ for jnt_ac, camera_comb in self.camera_idx_to_combination.items():
+ new_camera_ac = np.ones((2), dtype="i") * self.camera_null_bin
+ new_camera_ac[0] = self.camera_groups["camera_x"].index(camera_comb[0])
+ new_camera_ac[1] = self.camera_groups["camera_y"].index(camera_comb[1])
+ self.CAMERA_IDX_TO_FACTORED[jnt_ac] = new_camera_ac
+
+ def from_factored(self, ac: Dict) -> Dict:
+ """Converts a factored action (ac) to the new space. Assumes ac has a batch dim"""
+ assert ac["camera"].ndim == 2, f"bad camera label, {ac['camera']}"
+ assert ac["buttons"].ndim == 2, f"bad buttons label, {ac['buttons']}"
+ # Get button choices for everything but camera
+ choices_by_group = OrderedDict(
+ (k, self.factored_buttons_to_groups(ac["buttons"], v)) for k, v in self.BUTTONS_GROUPS.items() if k != "camera"
+ )
+ # Set camera "on off" action based on whether non-null camera action was given
+ camera_is_null = np.all(ac["camera"] == self.camera_null_bin, axis=1)
+ choices_by_group["camera"] = ["none" if is_null else "camera" for is_null in camera_is_null]
+
+ new_button_ac = []
+ new_camera_ac = []
+ for i in range(ac["buttons"].shape[0]):
+ # Buttons
+ key = tuple([v[i] for v in choices_by_group.values()])
+ if ac["buttons"][i, Buttons.ALL.index("inventory")] == 1:
+ key = "inventory"
+ new_button_ac.append(self.BUTTONS_COMBINATION_TO_IDX[key])
+
+ # Camera -- inventory is also exclusive with camera
+ if key == "inventory":
+ key = (
+ f"camera_x{self.camera_null_bin}",
+ f"camera_y{self.camera_null_bin}",
+ )
+ else:
+ key = (f"camera_x{ac['camera'][i][0]}", f"camera_y{ac['camera'][i][1]}")
+ new_camera_ac.append(self.camera_combination_to_idx[key])
+
+ return dict(
+ buttons=np.array(new_button_ac)[:, None],
+ camera=np.array(new_camera_ac)[:, None],
+ )
+
+ def to_factored(self, ac: Dict) -> Dict:
+ """Converts an action in the new space (ac) to the factored action space. Assumes ac has a batch dim"""
+ assert ac["camera"].shape[-1] == 1
+ assert ac["buttons"].shape[-1] == 1
+
+ new_button_ac = self.BUTTON_IDX_TO_FACTORED[np.squeeze(ac["buttons"], -1)]
+ camera_off = self.BUTTON_IDX_TO_CAMERA_META_OFF[np.squeeze(ac["buttons"], -1)]
+ new_camera_ac = self.CAMERA_IDX_TO_FACTORED[np.squeeze(ac["camera"], -1)]
+ new_camera_ac[camera_off] = self.camera_null_bin
+
+ return dict(buttons=new_button_ac, camera=new_camera_ac)
+
+ def get_action_space_update(self):
+ return {
+ "camera": TensorType(shape=(1,), eltype=Discrete(len(self.camera_combinations))),
+ "buttons": TensorType(shape=(1,), eltype=Discrete(len(self.BUTTONS_COMBINATIONS))),
+ }
+
+ def get_zero_action(self):
+ return self._null_action
+
diff --git a/docs/src/lib/actions.py b/docs/src/lib/actions.py
new file mode 100644
index 0000000..c0a676e
--- /dev/null
+++ b/docs/src/lib/actions.py
@@ -0,0 +1,178 @@
+import attr
+import minerl.herobraine.hero.mc as mc
+import numpy as np
+
+from lib.minecraft_util import store_args
+
+
+class Buttons:
+ ATTACK = "attack"
+ BACK = "back"
+ FORWARD = "forward"
+ JUMP = "jump"
+ LEFT = "left"
+ RIGHT = "right"
+ SNEAK = "sneak"
+ SPRINT = "sprint"
+ USE = "use"
+ DROP = "drop"
+ INVENTORY = "inventory"
+
+ ALL = [
+ ATTACK,
+ BACK,
+ FORWARD,
+ JUMP,
+ LEFT,
+ RIGHT,
+ SNEAK,
+ SPRINT,
+ USE,
+ DROP,
+ INVENTORY,
+ ] + [f"hotbar.{i}" for i in range(1, 10)]
+
+
+class SyntheticButtons:
+ # Composite / scripted actions
+ CHANNEL_ATTACK = "channel-attack"
+
+ ALL = [CHANNEL_ATTACK]
+
+
+class QuantizationScheme:
+ LINEAR = "linear"
+ MU_LAW = "mu_law"
+
+
+@attr.s(auto_attribs=True)
+class CameraQuantizer:
+ """
+ A camera quantizer that discretizes and undiscretizes a continuous camera input with y (pitch) and x (yaw) components.
+
+ Parameters:
+ - camera_binsize: The size of the bins used for quantization. In case of mu-law quantization, it corresponds to the average binsize.
+ - camera_maxval: The maximum value of the camera action.
+ - quantization_scheme: The quantization scheme to use. Currently, two quantization schemes are supported:
+ - Linear quantization (default): Camera actions are split uniformly into discrete bins
+ - Mu-law quantization: Transforms the camera action using mu-law encoding (https://en.wikipedia.org/wiki/%CE%9C-law_algorithm)
+ followed by the same quantization scheme used by the linear scheme.
+ - mu: Mu is the parameter that defines the curvature of the mu-law encoding. Higher values of
+ mu will result in a sharper transition near zero. Below are some reference values listed
+ for choosing mu given a constant maxval and a desired max_precision value.
+ maxval = 10 | max_precision = 0.5 | μ ≈ 2.93826
+ maxval = 10 | max_precision = 0.4 | μ ≈ 4.80939
+ maxval = 10 | max_precision = 0.25 | μ ≈ 11.4887
+ maxval = 20 | max_precision = 0.5 | μ ≈ 2.7
+ maxval = 20 | max_precision = 0.4 | μ ≈ 4.39768
+ maxval = 20 | max_precision = 0.25 | μ ≈ 10.3194
+ maxval = 40 | max_precision = 0.5 | μ ≈ 2.60780
+ maxval = 40 | max_precision = 0.4 | μ ≈ 4.21554
+ maxval = 40 | max_precision = 0.25 | μ ≈ 9.81152
+ """
+
+ camera_maxval: int
+ camera_binsize: int
+ quantization_scheme: str = attr.ib(
+ default=QuantizationScheme.LINEAR,
+ validator=attr.validators.in_([QuantizationScheme.LINEAR, QuantizationScheme.MU_LAW]),
+ )
+ mu: float = attr.ib(default=5)
+
+ def discretize(self, xy):
+ xy = np.clip(xy, -self.camera_maxval, self.camera_maxval)
+
+ if self.quantization_scheme == QuantizationScheme.MU_LAW:
+ xy = xy / self.camera_maxval
+ v_encode = np.sign(xy) * (np.log(1.0 + self.mu * np.abs(xy)) / np.log(1.0 + self.mu))
+ v_encode *= self.camera_maxval
+ xy = v_encode
+
+ # Quantize using linear scheme
+ return np.round((xy + self.camera_maxval) / self.camera_binsize).astype(np.int64)
+
+ def undiscretize(self, xy):
+ xy = xy * self.camera_binsize - self.camera_maxval
+
+ if self.quantization_scheme == QuantizationScheme.MU_LAW:
+ xy = xy / self.camera_maxval
+ v_decode = np.sign(xy) * (1.0 / self.mu) * ((1.0 + self.mu) ** np.abs(xy) - 1.0)
+ v_decode *= self.camera_maxval
+ xy = v_decode
+ return xy
+
+
+class ActionTransformer:
+ """Transforms actions between internal array and minerl env format."""
+
+ @store_args
+ def __init__(
+ self,
+ camera_maxval=10,
+ camera_binsize=2,
+ camera_quantization_scheme="linear",
+ camera_mu=5,
+ ):
+ self.quantizer = CameraQuantizer(
+ camera_maxval=camera_maxval,
+ camera_binsize=camera_binsize,
+ quantization_scheme=camera_quantization_scheme,
+ mu=camera_mu,
+ )
+
+ def camera_zero_bin(self):
+ return self.camera_maxval // self.camera_binsize
+
+ def discretize_camera(self, xy):
+ return self.quantizer.discretize(xy)
+
+ def undiscretize_camera(self, pq):
+ return self.quantizer.undiscretize(pq)
+
+ def item_embed_id_to_name(self, item_id):
+ return mc.MINERL_ITEM_MAP[item_id]
+
+ def dict_to_numpy(self, acs):
+ """
+ Env format to policy output format.
+ """
+ act = {
+ "buttons": np.stack([acs.get(k, 0) for k in Buttons.ALL], axis=-1),
+ "camera": self.discretize_camera(acs["camera"]),
+ }
+ if not self.human_spaces:
+ act.update(
+ {
+ "synthetic_buttons": np.stack([acs[k] for k in SyntheticButtons.ALL], axis=-1),
+ "place": self.item_embed_name_to_id(acs["place"]),
+ "equip": self.item_embed_name_to_id(acs["equip"]),
+ "craft": self.item_embed_name_to_id(acs["craft"]),
+ }
+ )
+ return act
+
+ def numpy_to_dict(self, acs):
+ """
+ Numpy policy output to env-compatible format.
+ """
+ assert acs["buttons"].shape[-1] == len(
+ Buttons.ALL
+ ), f"Mismatched actions: {acs}; expected {len(Buttons.ALL)}:\n( {Buttons.ALL})"
+ out = {name: acs["buttons"][..., i] for (i, name) in enumerate(Buttons.ALL)}
+
+ out["camera"] = self.undiscretize_camera(acs["camera"])
+
+ return out
+
+ def policy2env(self, acs):
+ acs = self.numpy_to_dict(acs)
+ return acs
+
+ def env2policy(self, acs):
+ nbatch = acs["camera"].shape[0]
+ dummy = np.zeros((nbatch,))
+ out = {
+ "camera": self.discretize_camera(acs["camera"]),
+ "buttons": np.stack([acs.get(k, dummy) for k in Buttons.ALL], axis=-1),
+ }
+ return out
diff --git a/docs/src/lib/impala_cnn.py b/docs/src/lib/impala_cnn.py
new file mode 100644
index 0000000..5cbb0ec
--- /dev/null
+++ b/docs/src/lib/impala_cnn.py
@@ -0,0 +1,195 @@
+import math
+from copy import deepcopy
+from typing import Dict, List, Optional
+
+from torch import nn
+from torch.nn import functional as F
+
+from lib import misc
+from lib import torch_util as tu
+from lib.util import FanInInitReLULayer
+
+
+class CnnBasicBlock(nn.Module):
+ """
+ Residual basic block, as in ImpalaCNN. Preserves channel number and shape
+ :param inchan: number of input channels
+ :param init_scale: weight init scale multiplier
+ """
+
+ def __init__(
+ self,
+ inchan: int,
+ init_scale: float = 1,
+ log_scope="",
+ init_norm_kwargs: Dict = {},
+ **kwargs,
+ ):
+ super().__init__()
+ self.inchan = inchan
+ s = math.sqrt(init_scale)
+ self.conv0 = FanInInitReLULayer(
+ self.inchan,
+ self.inchan,
+ kernel_size=3,
+ padding=1,
+ init_scale=s,
+ log_scope=f"{log_scope}/conv0",
+ **init_norm_kwargs,
+ )
+ self.conv1 = FanInInitReLULayer(
+ self.inchan,
+ self.inchan,
+ kernel_size=3,
+ padding=1,
+ init_scale=s,
+ log_scope=f"{log_scope}/conv1",
+ **init_norm_kwargs,
+ )
+
+ def forward(self, x):
+ x = x + self.conv1(self.conv0(x))
+ return x
+
+
+class CnnDownStack(nn.Module):
+ """
+ Downsampling stack from Impala CNN.
+ :param inchan: number of input channels
+ :param nblock: number of residual blocks after downsampling
+ :param outchan: number of output channels
+ :param init_scale: weight init scale multiplier
+ :param pool: if true, downsample with max pool
+ :param post_pool_groups: if not None, normalize with group norm with this many groups
+ :param kwargs: remaining kwargs are passed into the blocks and layers
+ """
+
+ name = "Impala_CnnDownStack"
+
+ def __init__(
+ self,
+ inchan: int,
+ nblock: int,
+ outchan: int,
+ init_scale: float = 1,
+ pool: bool = True,
+ post_pool_groups: Optional[int] = None,
+ log_scope: str = "",
+ init_norm_kwargs: Dict = {},
+ first_conv_norm=False,
+ **kwargs,
+ ):
+ super().__init__()
+ self.inchan = inchan
+ self.outchan = outchan
+ self.pool = pool
+ first_conv_init_kwargs = deepcopy(init_norm_kwargs)
+ if not first_conv_norm:
+ first_conv_init_kwargs["group_norm_groups"] = None
+ first_conv_init_kwargs["batch_norm"] = False
+ self.firstconv = FanInInitReLULayer(
+ inchan,
+ outchan,
+ kernel_size=3,
+ padding=1,
+ log_scope=f"{log_scope}/firstconv",
+ **first_conv_init_kwargs,
+ )
+ self.post_pool_groups = post_pool_groups
+ if post_pool_groups is not None:
+ self.n = nn.GroupNorm(post_pool_groups, outchan)
+ self.blocks = nn.ModuleList(
+ [
+ CnnBasicBlock(
+ outchan,
+ init_scale=init_scale / math.sqrt(nblock),
+ log_scope=f"{log_scope}/block{i}",
+ init_norm_kwargs=init_norm_kwargs,
+ **kwargs,
+ )
+ for i in range(nblock)
+ ]
+ )
+
+ def forward(self, x):
+ x = self.firstconv(x)
+ if self.pool:
+ x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
+ if self.post_pool_groups is not None:
+ x = self.n(x)
+ x = tu.sequential(self.blocks, x, diag_name=self.name)
+ return x
+
+ def output_shape(self, inshape):
+ c, h, w = inshape
+ assert c == self.inchan
+ if self.pool:
+ return (self.outchan, (h + 1) // 2, (w + 1) // 2)
+ else:
+ return (self.outchan, h, w)
+
+
+class ImpalaCNN(nn.Module):
+ """
+ :param inshape: input image shape (height, width, channels)
+ :param chans: number of residual downsample stacks. Each element is the number of
+ filters per convolution in the stack
+ :param outsize: output hidden size
+ :param nblock: number of residual blocks per stack. Each block has 2 convs and a residual
+ :param init_norm_kwargs: arguments to be passed to convolutional layers. Options can be found
+ in ypt.model.util:FanInInitReLULayer
+ :param dense_init_norm_kwargs: arguments to be passed to convolutional layers. Options can be found
+ in ypt.model.util:FanInInitReLULayer
+ :param kwargs: remaining kwargs are passed into the CnnDownStacks
+ """
+
+ name = "ImpalaCNN"
+
+ def __init__(
+ self,
+ inshape: List[int],
+ chans: List[int],
+ outsize: int,
+ nblock: int,
+ init_norm_kwargs: Dict = {},
+ dense_init_norm_kwargs: Dict = {},
+ first_conv_norm=False,
+ **kwargs,
+ ):
+ super().__init__()
+ h, w, c = inshape
+ curshape = (c, h, w)
+ self.stacks = nn.ModuleList()
+ for i, outchan in enumerate(chans):
+ stack = CnnDownStack(
+ curshape[0],
+ nblock=nblock,
+ outchan=outchan,
+ init_scale=math.sqrt(len(chans)),
+ log_scope=f"downstack{i}",
+ init_norm_kwargs=init_norm_kwargs,
+ first_conv_norm=first_conv_norm if i == 0 else True,
+ **kwargs,
+ )
+ self.stacks.append(stack)
+ curshape = stack.output_shape(curshape)
+
+ self.dense = FanInInitReLULayer(
+ misc.intprod(curshape),
+ outsize,
+ layer_type="linear",
+ log_scope="imapala_final_dense",
+ init_scale=1.4,
+ **dense_init_norm_kwargs,
+ )
+ self.outsize = outsize
+
+ def forward(self, x):
+ b, t = x.shape[:-3]
+ x = x.reshape(b * t, *x.shape[-3:])
+ x = misc.transpose(x, "bhwc", "bchw")
+ x = tu.sequential(self.stacks, x, diag_name=self.name)
+ x = x.reshape(b, t, *x.shape[1:])
+ x = tu.flatten_image(x)
+ x = self.dense(x)
+ return x
diff --git a/docs/src/lib/masked_attention.py b/docs/src/lib/masked_attention.py
new file mode 100644
index 0000000..02d224f
--- /dev/null
+++ b/docs/src/lib/masked_attention.py
@@ -0,0 +1,182 @@
+import functools
+
+import torch as th
+from torch import nn
+
+import lib.xf as xf
+from lib.minecraft_util import store_args
+from lib.tree_util import tree_map
+
+
+@functools.lru_cache()
+def get_band_diagonal_mask(t: int, T: int, maxlen: int, batchsize: int, device: th.device) -> th.Tensor:
+ """Returns a band diagonal mask which is causal (upper triangle is masked)
+ and such that any frame can only view up to maxlen total past frames
+ including the current frame.
+
+ Example Masks: Here 0 means that frame is masked and we mask it by adding a huge number to the attention logits (see orc.xf)
+ t = 3, T = 3, maxlen = 3
+ T
+ t 1 0 0 | mask out T > t
+ 1 1 0 |
+ 1 1 1 |
+ t = 3, T = 6, maxlen = 3
+ t 0 1 1 1 0 0 | mask out T > t
+ 0 0 1 1 1 0 |
+ 0 0 0 1 1 1 |
+
+ Args:
+ t: number of rows (presumably number of frames recieving gradient)
+ T: number of cols (presumably t + past context that isn't being gradient updated)
+ maxlen: maximum number of frames (including current frame) any frame can attend to
+ batchsize: number of masks to return
+ device: torch device to place mask on
+
+ Returns:
+ Boolean mask of shape (batchsize, t, T)
+ """
+ m = th.ones(t, T, dtype=bool)
+ m.tril_(T - t) # Mask out upper triangle
+ if maxlen is not None and maxlen < T: # Mask out lower triangle
+ m.triu_(T - t - maxlen + 1)
+ m_btT = m[None].repeat_interleave(batchsize, dim=0)
+ m_btT = m_btT.to(device=device)
+ return m_btT
+
+
+def get_mask(first_b11: th.Tensor, state_mask: th.Tensor, t: int, T: int, maxlen: int, heads: int, device) -> th.Tensor:
+ """Returns a band diagonal mask that respects masking past states (columns 0:T-t inclusive)
+ if first_b11 is True. See get_band_diagonal_mask for how the base mask is computed.
+ This function takes that mask and first zeros out any past context if first_b11 is True.
+
+ Say our context is in chunks of length t (so here T = 4t). We see that in the second batch we recieved first=True
+ context t t t t
+ first F T F F
+ Now, given this the mask should mask out anything prior to T < t; however since we don't have access to the past first_b11's
+ we need to keep a state of the mask at those past timesteps. This is what state_mask is.
+
+ In particular state_mask is a [b, t, T - t] mask matrix that contains the mask for the past T - t frames.
+
+ Args: (See get_band_diagonal_mask for remaining args)
+ first_b11: boolean tensor with shape [batchsize, 1, 1] indicating if the first timestep for each batch element had first=True
+ state_mask: mask tensor of shape [b, t, T - t]
+ t: number of mask rows (presumably number of frames for which we take gradient)
+ T: number of mask columns (t + the number of past frames we keep in context)
+ maxlen: actual context length
+ heads: number of attention heads
+ device: torch device
+
+ Returns:
+ m_btT: Boolean mask of shape (batchsize * heads, t, T)
+ state_mask: updated state_mask
+ """
+ b = first_b11.shape[0]
+
+ if state_mask is None:
+ state_mask = th.zeros((b, 1, T - t), dtype=bool, device=device)
+
+ m_btT = get_band_diagonal_mask(t, T, maxlen, b, device).clone() # Should be shape B, t, T
+ not_first = ~first_b11.to(device=device)
+ m_btT[:, :, :-t] &= not_first # Zero out anything in the past if first is true
+ m_btT[:, :, :-t] &= state_mask
+ m_bhtT = m_btT[:, None].repeat_interleave(heads, dim=1)
+ m_btT = m_bhtT.reshape((b * heads), t, T)
+
+ # Update state_mask such that it reflects the most recent first
+ state_mask = th.cat(
+ [
+ state_mask[:, :, t:] & not_first,
+ th.ones((b, 1, min(t, T - t)), dtype=bool, device=device),
+ ],
+ dim=-1,
+ )
+
+ return m_btT, state_mask
+
+
+class MaskedAttention(nn.Module):
+ """
+ Transformer self-attention layer that removes frames from previous episodes from the hidden state under certain constraints.
+
+ The constraints are:
+ - The "first" flag can only be true for the first timestep of each batch. An assert will fire if other timesteps have first = True.
+
+ input_size: The dimension of the input (which also happens to be the size of the output)
+ memory_size: The number of frames to keep in the inner state. Note that when attending, we will be able to attend
+ to both the frames in the inner state (which presumably won't have gradients anymore) and the frames
+ in the batch. "mask" for some additional considerations on this.
+ heads: The number of attention heads to use. Note that we will split the input into this number of heads, so
+ input_size needs to be divisible by heads.
+ timesteps: number of timesteps with which we'll be taking gradient
+ mask: Can be "none" or "clipped_causal". "clipped_causal" is a normal causal mask but solves the following minor problem:
+ if you have a state of length 128 and a batch of 128 frames, then the first frame of your batch will be able to
+ attend to 128 previous frames, but the last one will be able to attend to 255 previous frames. In this example,
+ "clipped_causal" will make it so that the last frame can only attend to 128 previous frames, so that there is no
+ bias coming from the position in the batch. None simply allows you to attend to any frame in the state + batch,
+ which means you can also attend to future frames.
+ """
+
+ @store_args
+ def __init__(
+ self,
+ input_size,
+ memory_size: int,
+ heads: int,
+ timesteps: int,
+ mask: str = "clipped_causal",
+ init_scale=1,
+ norm="none",
+ log_scope="sa",
+ use_muP_factor=False,
+ ):
+ super().__init__()
+
+ assert mask in {"none", "clipped_causal"}
+ assert memory_size >= 0
+
+ self.maxlen = memory_size - timesteps
+ if mask == "none":
+ mask = None
+
+ self.orc_attn = xf.All2All(heads, self.maxlen, mask=mask is not None)
+ self.orc_block = xf.SelfAttentionLayer(
+ input_size,
+ self.orc_attn,
+ scale=init_scale,
+ relattn=True,
+ cache_keep_len=self.maxlen,
+ norm=norm,
+ log_scope=log_scope,
+ use_muP_factor=use_muP_factor,
+ )
+
+ def initial_state(self, batchsize: int, device=None):
+ """Return the initial state mask (None) and the initial state of the transformer (zerod out keys and queries)"""
+ state = self.orc_block.initial_state(batchsize, initial_T=self.maxlen)
+ state_mask = None
+ if device is not None:
+ state = tree_map(lambda x: x.to(device), state)
+ return state_mask, state
+
+ def forward(self, input_bte, first_bt, state):
+ """Forward propagation of a single layer"""
+ state_mask, xf_state = state
+ t = first_bt.shape[1]
+ if self.mask == "clipped_causal":
+ new_mask, state_mask = get_mask(
+ first_b11=first_bt[:, [[0]]],
+ state_mask=state_mask,
+ t=t,
+ T=t + self.maxlen,
+ maxlen=self.maxlen,
+ heads=self.heads,
+ device=input_bte.device,
+ )
+ self.orc_block.attn.mask = new_mask
+ output, xf_state = self.orc_block(input_bte, xf_state)
+
+ return output, (state_mask, xf_state)
+
+ def get_log_keys(self):
+ # These are logged in xf.SelfAttentionLayer
+ return [f"activation_{stat}/{self.log_scope}/{k}" for k in ["K", "Q", "V", "A", "Aproj"] for stat in ["mean", "std"]]
diff --git a/docs/src/lib/minecraft_util.py b/docs/src/lib/minecraft_util.py
new file mode 100644
index 0000000..2b2d7a1
--- /dev/null
+++ b/docs/src/lib/minecraft_util.py
@@ -0,0 +1,88 @@
+import functools
+import inspect
+from typing import Optional, Tuple
+
+import numpy as np
+import torch
+
+from lib.action_head import (CategoricalActionHead, DiagGaussianActionHead,
+ DictActionHead)
+
+
+def store_args(method):
+ """Stores provided method args as instance attributes."""
+ argspec = inspect.getfullargspec(method)
+ defaults = {}
+ if argspec.defaults is not None:
+ defaults = dict(zip(argspec.args[-len(argspec.defaults) :], argspec.defaults))
+ if argspec.kwonlydefaults is not None:
+ defaults.update(argspec.kwonlydefaults)
+ arg_names = argspec.args[1:]
+
+ @functools.wraps(method)
+ def wrapper(*positional_args, **keyword_args):
+ self = positional_args[0]
+ # Get default arg values
+ args = defaults.copy()
+ # Add provided arg values
+ for name, value in zip(arg_names, positional_args[1:]):
+ args[name] = value
+ args.update(keyword_args)
+ self.__dict__.update(args)
+ return method(*positional_args, **keyword_args)
+
+ return wrapper
+
+
+def get_norm_entropy_from_cat_head(module, name, masks, logits):
+ # Note that the mask has already been applied to the logits at this point
+ entropy = -torch.sum(torch.exp(logits) * logits, dim=-1)
+ if name in masks:
+ n = torch.sum(masks[name], dim=-1, dtype=torch.float)
+ norm_entropy = entropy / torch.log(n)
+ # When the mask only allows one option the normalized entropy makes no sense
+ # as it is basically both maximal (the distribution is as uniform as it can be)
+ # and minimal (there is no variance at all).
+ # A such, we ignore them for purpose of calculating entropy.
+ zero = torch.zeros_like(norm_entropy)
+ norm_entropy = torch.where(n.eq(1.0), zero, norm_entropy)
+ count = n.not_equal(1.0).int()
+ else:
+ n = torch.tensor(logits.shape[-1], dtype=torch.float)
+ norm_entropy = entropy / torch.log(n)
+ count = torch.ones_like(norm_entropy, dtype=torch.int)
+
+ # entropy is per-entry, still of size self.output_shape[:-1]; we need to reduce of the rest of it.
+ for _ in module.output_shape[:-1]:
+ norm_entropy = norm_entropy.sum(dim=-1)
+ count = count.sum(dim=-1)
+ return norm_entropy, count
+
+
+def get_norm_cat_entropy(module, masks, logits, template) -> Tuple[torch.Tensor, torch.Tensor]:
+ entropy_sum = torch.zeros_like(template, dtype=torch.float)
+ counts = torch.zeros_like(template, dtype=torch.int)
+ for k, subhead in module.items():
+ if isinstance(subhead, DictActionHead):
+ entropy, count = get_norm_cat_entropy(subhead, masks, logits[k], template)
+ elif isinstance(subhead, CategoricalActionHead):
+ entropy, count = get_norm_entropy_from_cat_head(subhead, k, masks, logits[k])
+ else:
+ continue
+ entropy_sum += entropy
+ counts += count
+ return entropy_sum, counts
+
+
+def get_diag_guassian_entropy(module, logits, template) -> Optional[torch.Tensor]:
+ entropy_sum = torch.zeros_like(template, dtype=torch.float)
+ count = torch.zeros(1, device=template.device, dtype=torch.int)
+ for k, subhead in module.items():
+ if isinstance(subhead, DictActionHead):
+ entropy_sum += get_diag_guassian_entropy(subhead, logits[k], template)
+ elif isinstance(subhead, DiagGaussianActionHead):
+ entropy_sum += module.entropy(logits)
+ else:
+ continue
+ count += 1
+ return entropy_sum / count
diff --git a/docs/src/lib/misc.py b/docs/src/lib/misc.py
new file mode 100644
index 0000000..25ac90f
--- /dev/null
+++ b/docs/src/lib/misc.py
@@ -0,0 +1,263 @@
+import numpy as np
+import torch as th
+
+
+def intprod(xs):
+ """
+ Product of a sequence of integers
+ """
+ out = 1
+ for x in xs:
+ out *= x
+ return out
+
+
+def safezip(*args):
+ """
+ Check that lengths of sequences are the same, then zip them
+ """
+ args = [list(a) for a in args]
+ n = len(args[0])
+ for arg in args[1:]:
+ assert len(arg) == n, f"length mismatch: {list(map(len, args))}"
+ return list(zip(*args))
+
+
+def transpose(x, before, after):
+ """
+ Usage: x_bca = transpose(x_abc, 'abc', 'bca')
+ """
+ assert sorted(before) == sorted(after), f"cannot transpose {before} to {after}"
+ assert x.ndim == len(
+ before
+ ), f"before spec '{before}' has length {len(before)} but x has {x.ndim} dimensions: {tuple(x.shape)}"
+ return x.permute(tuple(before.index(i) for i in after))
+
+
+def transpose_undo(x, before, after, *, undo=None):
+ """
+ Usage:
+ x_bca, undo = transpose_undo(x_abc, 'abc', 'bca')
+ x_bca = fully_connected_layer(x_bca)
+ x_abc = undo(x_bca)
+ """
+ return (
+ transpose(x, before, after),
+ compose_undo(undo, lambda x: transpose(x, before=after, after=before)),
+ )
+
+
+def compose_undo(u1, u2):
+ assert u2 is not None
+ if u1 is None:
+ return u2
+
+ def u(x):
+ x = u2(x)
+ x = u1(x)
+ return x
+
+ return u
+
+
+NO_BIND = "__nobind"
+
+
+def _parse_reshape_str(s, kind):
+ assert kind in ("before", "after")
+ result = []
+ n_underscores = 0
+ for i, part in enumerate(s.split(",")):
+ part = part.strip()
+ if part == "?" and kind == "before":
+ result.append([f"__{i}"])
+ elif part == "_":
+ result.append([f"{NO_BIND}_{n_underscores}"])
+ n_underscores += 1
+ else:
+ result.append([term.strip() for term in part.split("*")])
+ return result
+
+
+def _infer_part(part, concrete_dim, known, index, full_shape):
+ if type(part) is int:
+ return part
+ assert isinstance(part, list), part
+ lits = []
+ syms = []
+ for term in part:
+ if type(term) is int:
+ lits.append(term)
+ elif type(term) is str:
+ syms.append(term)
+ else:
+ raise TypeError(f"got {type(term)} but expected int or str")
+ int_part = 1
+ for x in lits:
+ int_part *= x
+ if len(syms) == 0:
+ return int_part
+ elif len(syms) == 1 and concrete_dim is not None:
+ assert concrete_dim % int_part == 0, f"{concrete_dim} % {int_part} != 0 (at index {index}, full shape is {full_shape})"
+ v = concrete_dim // int_part
+ if syms[0] in known:
+ assert (
+ known[syms[0]] == v
+ ), f"known value for {syms[0]} is {known[syms[0]]} but found value {v} at index {index} (full shape is {full_shape})"
+ else:
+ known[syms[0]] = v
+ return concrete_dim
+ else:
+ for i in range(len(syms)):
+ if syms[i] in known:
+ syms[i] = known[syms[i]]
+ else:
+ try:
+ syms[i] = int(syms[i])
+ except ValueError:
+ pass
+ return lits + syms
+
+
+def _infer_step(args):
+ known, desc, shape = args
+ new_known = known.copy()
+ new_desc = desc.copy()
+ for i in range(len(desc)):
+ if shape is None:
+ concrete_dim = None
+ else:
+ concrete_dim = shape[i]
+ new_desc[i] = _infer_part(part=desc[i], concrete_dim=concrete_dim, known=new_known, index=i, full_shape=shape)
+ return new_known, new_desc, shape
+
+
+def _infer(known, desc, shape):
+ if shape is not None:
+ assert len(desc) == len(shape), f"desc has length {len(desc)} but shape has length {len(shape)} (shape={shape})"
+ known, desc, shape = fixed_point(_infer_step, (known, desc, shape))
+ return desc, known
+
+
+def fixed_point(f, x, eq=None):
+ if eq is None:
+ eq = lambda a, b: a == b
+ while True:
+ new_x = f(x)
+ if eq(x, new_x):
+ return x
+ else:
+ x = new_x
+
+
+def _infer_question_mark(x, total_product):
+ try:
+ question_mark_index = x.index(["?"])
+ except ValueError:
+ return x
+ observed_product = 1
+ for i in range(len(x)):
+ if i != question_mark_index:
+ assert type(x[i]) is int, f"when there is a question mark, there can be no other unknown values (full list: {x})"
+ observed_product *= x[i]
+ assert (
+ observed_product and total_product % observed_product == 0
+ ), f"{total_product} is not divisible by {observed_product}"
+ value = total_product // observed_product
+ x = x.copy()
+ x[question_mark_index] = value
+ return x
+
+
+def _ground(x, known, infer_question_mark_with=None):
+ x, known = _infer(known=known, desc=x, shape=None)
+ if infer_question_mark_with:
+ x = _infer_question_mark(x, infer_question_mark_with)
+ for part in x:
+ assert type(part) is int, f"cannot infer value of {part}"
+ return x
+
+
+def _handle_ellipsis(x, before, after):
+ ell = ["..."]
+ try:
+ i = before.index(ell)
+ l = len(x.shape) - len(before) + 1
+ ellipsis_value = x.shape[i : i + l]
+ ellipsis_value = list(ellipsis_value)
+ before = before[:i] + ellipsis_value + before[i + 1 :]
+ except ValueError:
+ pass
+ try:
+ i = after.index(ell)
+ after = after[:i] + ellipsis_value + after[i + 1 :]
+ except ValueError:
+ pass
+ except UnboundLocalError as e:
+ raise ValueError("there cannot be an ellipsis in 'after' unless there is an ellipsis in 'before'") from e
+ return before, after
+
+
+def reshape_undo(inp, before, after, *, undo=None, known=None, **kwargs):
+ """
+ Usage:
+ x_Bhwse, undo = reshape_undo(
+ x_bthwe,
+ 'b, t, ..., stride*e',
+ 'b*t, ..., stride, e',
+ stride=7
+ )
+ x_Bhwse = do_some_stuff(x_Bhwse)
+ x_bthwe = undo(x_Bhwse)
+
+ It's necessary to pass known values as keywords only
+ when they can't be inferred from the shape.
+
+ (Eg. in the above example we needed to pass
+ stride but not b, t, or e, since those can be determined from
+ inp.shape once stride is known.)
+ """
+ if known:
+ known = {**kwargs, **known}
+ else:
+ known = kwargs
+ assert type(before) is type(after), f"{type(before)} != {type(after)}"
+ assert isinstance(inp, (th.Tensor, np.ndarray)), f"require tensor or ndarray but got {type(inp)}"
+ assert isinstance(before, (str, list)), f"require str or list but got {type(before)}"
+ if isinstance(before, str):
+ before = _parse_reshape_str(before, "before")
+ after = _parse_reshape_str(after, "after")
+ before, after = _handle_ellipsis(inp, before, after)
+ before_saved, after_saved = before, after
+ before, known = _infer(known=known, desc=before, shape=inp.shape)
+ before = _ground(before, known, product(inp.shape))
+ after = _ground(after, known, product(inp.shape))
+ known = {k: v for k, v in known.items() if not k.startswith(NO_BIND)}
+ assert tuple(inp.shape) == tuple(before), f"expected shape {before} but got shape {inp.shape}"
+ assert product(inp.shape) == product(
+ after
+ ), f"cannot reshape {inp.shape} to {after} because the number of elements does not match"
+ return (
+ inp.reshape(after),
+ compose_undo(undo, lambda inp: reshape(inp, after_saved, before_saved, known=known)),
+ )
+
+
+def reshape(*args, **kwargs):
+ """
+ Please see the documenation for reshape_undo.
+ """
+ x, _ = reshape_undo(*args, **kwargs)
+ return x
+
+
+def product(xs, one=1):
+ result = one
+ for x in xs:
+ result = result * x
+ return result
+
+
+def exact_div(a, b):
+ assert a % b == 0, f"{a} is not divisible by {b}"
+ return a // b
diff --git a/docs/src/lib/mlp.py b/docs/src/lib/mlp.py
new file mode 100644
index 0000000..6ec01e2
--- /dev/null
+++ b/docs/src/lib/mlp.py
@@ -0,0 +1,31 @@
+import torch as th
+from torch import nn
+
+from lib import misc
+from lib import torch_util as tu
+
+
+class MLP(nn.Module):
+ def __init__(self, insize, nhidlayer, outsize, hidsize, hidactiv, dtype=th.float32):
+ super().__init__()
+ self.insize = insize
+ self.nhidlayer = nhidlayer
+ self.outsize = outsize
+ in_sizes = [insize] + [hidsize] * nhidlayer
+ out_sizes = [hidsize] * nhidlayer + [outsize]
+ self.layers = nn.ModuleList(
+ [tu.NormedLinear(insize, outsize, dtype=dtype) for (insize, outsize) in misc.safezip(in_sizes, out_sizes)]
+ )
+ self.hidactiv = hidactiv
+
+ def forward(self, x):
+ *hidlayers, finallayer = self.layers
+ for layer in hidlayers:
+ x = layer(x)
+ x = self.hidactiv(x)
+ x = finallayer(x)
+ return x
+
+ @property
+ def output_shape(self):
+ return (self.outsize,)
diff --git a/docs/src/lib/normalize_ewma.py b/docs/src/lib/normalize_ewma.py
new file mode 100644
index 0000000..815e241
--- /dev/null
+++ b/docs/src/lib/normalize_ewma.py
@@ -0,0 +1,60 @@
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+class NormalizeEwma(nn.Module):
+ """Normalize a vector of observations - across the first norm_axes dimensions"""
+
+ def __init__(self, input_shape, norm_axes=2, beta=0.99999, per_element_update=False, epsilon=1e-5):
+ super().__init__()
+
+ self.input_shape = input_shape
+ self.norm_axes = norm_axes
+ self.epsilon = epsilon
+ self.beta = beta
+ self.per_element_update = per_element_update
+
+ self.running_mean = nn.Parameter(torch.zeros(input_shape, dtype=torch.float), requires_grad=False)
+ self.running_mean_sq = nn.Parameter(torch.zeros(input_shape, dtype=torch.float), requires_grad=False)
+ self.debiasing_term = nn.Parameter(torch.tensor(0.0, dtype=torch.float), requires_grad=False)
+
+ def reset_parameters(self):
+ self.running_mean.zero_()
+ self.running_mean_sq.zero_()
+ self.debiasing_term.zero_()
+
+ def running_mean_var(self):
+ debiased_mean = self.running_mean / self.debiasing_term.clamp(min=self.epsilon)
+ debiased_mean_sq = self.running_mean_sq / self.debiasing_term.clamp(min=self.epsilon)
+ debiased_var = (debiased_mean_sq - debiased_mean ** 2).clamp(min=1e-2)
+ return debiased_mean, debiased_var
+
+ def forward(self, input_vector):
+ # Make sure input is float32
+ input_vector = input_vector.to(torch.float)
+
+ if self.training:
+ # Detach input before adding it to running means to avoid backpropping through it on
+ # subsequent batches.
+ detached_input = input_vector.detach()
+ batch_mean = detached_input.mean(dim=tuple(range(self.norm_axes)))
+ batch_sq_mean = (detached_input ** 2).mean(dim=tuple(range(self.norm_axes)))
+
+ if self.per_element_update:
+ batch_size = np.prod(detached_input.size()[: self.norm_axes])
+ weight = self.beta ** batch_size
+ else:
+ weight = self.beta
+
+ self.running_mean.mul_(weight).add_(batch_mean * (1.0 - weight))
+ self.running_mean_sq.mul_(weight).add_(batch_sq_mean * (1.0 - weight))
+ self.debiasing_term.mul_(weight).add_(1.0 * (1.0 - weight))
+
+ mean, var = self.running_mean_var()
+ return (input_vector - mean[(None,) * self.norm_axes]) / torch.sqrt(var)[(None,) * self.norm_axes]
+
+ def denormalize(self, input_vector):
+ """Transform normalized data back into original distribution"""
+ mean, var = self.running_mean_var()
+ return input_vector * torch.sqrt(var)[(None,) * self.norm_axes] + mean[(None,) * self.norm_axes]
diff --git a/docs/src/lib/policy.py b/docs/src/lib/policy.py
new file mode 100644
index 0000000..b517a08
--- /dev/null
+++ b/docs/src/lib/policy.py
@@ -0,0 +1,467 @@
+from copy import deepcopy
+from email import policy
+from typing import Dict, Optional
+
+import numpy as np
+import torch as th
+from gym3.types import DictType
+from torch import nn
+from torch.nn import functional as F
+
+from lib.action_head import make_action_head
+from lib.action_mapping import CameraHierarchicalMapping
+from lib.impala_cnn import ImpalaCNN
+from lib.normalize_ewma import NormalizeEwma
+from lib.scaled_mse_head import ScaledMSEHead
+from lib.tree_util import tree_map
+from lib.util import FanInInitReLULayer, ResidualRecurrentBlocks
+from lib.misc import transpose
+
+
+class ImgPreprocessing(nn.Module):
+ """Normalize incoming images.
+
+ :param img_statistics: remote path to npz file with a mean and std image. If specified
+ normalize images using this.
+ :param scale_img: If true and img_statistics not specified, scale incoming images by 1/255.
+ """
+
+ def __init__(self, img_statistics: Optional[str] = None, scale_img: bool = True):
+ super().__init__()
+ self.img_mean = None
+ if img_statistics is not None:
+ img_statistics = dict(**np.load(img_statistics))
+ self.img_mean = nn.Parameter(th.Tensor(img_statistics["mean"]), requires_grad=False)
+ self.img_std = nn.Parameter(th.Tensor(img_statistics["std"]), requires_grad=False)
+ else:
+ self.ob_scale = 255.0 if scale_img else 1.0
+
+ def forward(self, img):
+ x = img.to(dtype=th.float32)
+ if self.img_mean is not None:
+ x = (x - self.img_mean) / self.img_std
+ else:
+ x = x / self.ob_scale
+ return x
+
+
+class ImgObsProcess(nn.Module):
+ """ImpalaCNN followed by a linear layer.
+
+ :param cnn_outsize: impala output dimension
+ :param output_size: output size of the linear layer.
+ :param dense_init_norm_kwargs: kwargs for linear FanInInitReLULayer
+ :param init_norm_kwargs: kwargs for 2d and 3d conv FanInInitReLULayer
+ """
+
+ def __init__(
+ self,
+ cnn_outsize: int,
+ output_size: int,
+ dense_init_norm_kwargs: Dict = {},
+ init_norm_kwargs: Dict = {},
+ **kwargs,
+ ):
+ super().__init__()
+ self.cnn = ImpalaCNN(
+ outsize=cnn_outsize,
+ init_norm_kwargs=init_norm_kwargs,
+ dense_init_norm_kwargs=dense_init_norm_kwargs,
+ **kwargs,
+ )
+ self.linear = FanInInitReLULayer(
+ cnn_outsize,
+ output_size,
+ layer_type="linear",
+ **dense_init_norm_kwargs,
+ )
+
+ def forward(self, img):
+ return self.linear(self.cnn(img))
+
+
+class MinecraftPolicy(nn.Module):
+ """
+ :param recurrence_type:
+ None - No recurrence, adds no extra layers
+ lstm - (Depreciated). Singular LSTM
+ multi_layer_lstm - Multi-layer LSTM. Uses n_recurrence_layers to determine number of consecututive LSTMs
+ Does NOT support ragged batching
+ multi_masked_lstm - Multi-layer LSTM that supports ragged batching via the first vector. This model is slower
+ Uses n_recurrence_layers to determine number of consecututive LSTMs
+ transformer - Dense transformer
+ :param init_norm_kwargs: kwargs for all FanInInitReLULayers.
+ """
+
+ def __init__(
+ self,
+ recurrence_type="lstm",
+ impala_width=1,
+ impala_chans=(16, 32, 32),
+ obs_processing_width=256,
+ hidsize=512,
+ single_output=False, # True if we don't need separate outputs for action/value outputs
+ img_shape=None,
+ scale_input_img=True,
+ only_img_input=False,
+ init_norm_kwargs={},
+ impala_kwargs={},
+ # Unused argument assumed by forc.
+ input_shape=None, # pylint: disable=unused-argument
+ active_reward_monitors=None,
+ img_statistics=None,
+ first_conv_norm=False,
+ diff_mlp_embedding=False,
+ attention_mask_style="clipped_causal",
+ attention_heads=8,
+ attention_memory_size=2048,
+ use_pointwise_layer=True,
+ pointwise_ratio=4,
+ pointwise_use_activation=False,
+ n_recurrence_layers=1,
+ recurrence_is_residual=True,
+ timesteps=None,
+ use_pre_lstm_ln=True, # Not needed for transformer
+ **unused_kwargs,
+ ):
+ super().__init__()
+ assert recurrence_type in [
+ "multi_layer_lstm",
+ "multi_layer_bilstm",
+ "multi_masked_lstm",
+ "transformer",
+ "none",
+ ]
+
+ active_reward_monitors = active_reward_monitors or {}
+
+ self.single_output = single_output
+
+ chans = tuple(int(impala_width * c) for c in impala_chans)
+ self.hidsize = hidsize
+
+ # Dense init kwargs replaces batchnorm/groupnorm with layernorm
+ self.init_norm_kwargs = init_norm_kwargs
+ self.dense_init_norm_kwargs = deepcopy(init_norm_kwargs)
+ if self.dense_init_norm_kwargs.get("group_norm_groups", None) is not None:
+ self.dense_init_norm_kwargs.pop("group_norm_groups", None)
+ self.dense_init_norm_kwargs["layer_norm"] = True
+ if self.dense_init_norm_kwargs.get("batch_norm", False):
+ self.dense_init_norm_kwargs.pop("batch_norm", False)
+ self.dense_init_norm_kwargs["layer_norm"] = True
+
+ # Setup inputs
+ self.img_preprocess = ImgPreprocessing(img_statistics=img_statistics, scale_img=scale_input_img)
+ self.img_process = ImgObsProcess(
+ cnn_outsize=256,
+ output_size=hidsize,
+ inshape=img_shape,
+ chans=chans,
+ nblock=2,
+ dense_init_norm_kwargs=self.dense_init_norm_kwargs,
+ init_norm_kwargs=init_norm_kwargs,
+ first_conv_norm=first_conv_norm,
+ **impala_kwargs,
+ )
+
+ self.pre_lstm_ln = nn.LayerNorm(hidsize) if use_pre_lstm_ln else None
+ self.diff_obs_process = None
+
+ self.recurrence_type = recurrence_type
+
+ self.recurrent_layer = None
+ self.recurrent_layer = ResidualRecurrentBlocks(
+ hidsize=hidsize,
+ timesteps=timesteps,
+ recurrence_type=recurrence_type,
+ is_residual=recurrence_is_residual,
+ use_pointwise_layer=use_pointwise_layer,
+ pointwise_ratio=pointwise_ratio,
+ pointwise_use_activation=pointwise_use_activation,
+ attention_mask_style=attention_mask_style,
+ attention_heads=attention_heads,
+ attention_memory_size=attention_memory_size,
+ n_block=n_recurrence_layers,
+ )
+
+ self.lastlayer = FanInInitReLULayer(hidsize, hidsize, layer_type="linear", **self.dense_init_norm_kwargs)
+ self.final_ln = th.nn.LayerNorm(hidsize)
+
+ def output_latent_size(self):
+ return self.hidsize
+
+ def forward(self, ob, state_in, context):
+ first = context["first"]
+
+ x = self.img_preprocess(ob["img"])
+ x = self.img_process(x)
+
+ if self.diff_obs_process:
+ processed_obs = self.diff_obs_process(ob["diff_goal"])
+ x = processed_obs + x
+
+ if self.pre_lstm_ln is not None:
+ x = self.pre_lstm_ln(x)
+
+ if self.recurrent_layer is not None:
+ x, state_out = self.recurrent_layer(x, first, state_in)
+ else:
+ state_out = state_in
+
+ x = F.relu(x, inplace=False)
+
+ x = self.lastlayer(x)
+ x = self.final_ln(x)
+ pi_latent = vf_latent = x
+ if self.single_output:
+ return pi_latent, state_out
+ return (pi_latent, vf_latent), state_out
+
+ def initial_state(self, batchsize):
+ if self.recurrent_layer:
+ return self.recurrent_layer.initial_state(batchsize)
+ else:
+ return None
+
+
+class MinecraftAgentPolicy(nn.Module):
+ def __init__(self, action_space, policy_kwargs, pi_head_kwargs):
+ super().__init__()
+ self.net = MinecraftPolicy(**policy_kwargs)
+
+ self.action_space = action_space
+
+ self.value_head = self.make_value_head(self.net.output_latent_size())
+ self.pi_head = self.make_action_head(self.net.output_latent_size(), **pi_head_kwargs)
+
+ def make_value_head(self, v_out_size: int, norm_type: str = "ewma", norm_kwargs: Optional[Dict] = None):
+ return ScaledMSEHead(v_out_size, 1, norm_type=norm_type, norm_kwargs=norm_kwargs)
+
+ def make_action_head(self, pi_out_size: int, **pi_head_opts):
+ return make_action_head(self.action_space, pi_out_size, **pi_head_opts)
+
+ def initial_state(self, batch_size: int):
+ return self.net.initial_state(batch_size)
+
+ def reset_parameters(self):
+ super().reset_parameters()
+ self.net.reset_parameters()
+ self.pi_head.reset_parameters()
+ self.value_head.reset_parameters()
+
+ def forward(self, obs, first: th.Tensor, state_in):
+ if isinstance(obs, dict):
+ # We don't want to mutate the obs input.
+ obs = obs.copy()
+
+ # If special "mask" key is in obs,
+ # It's for masking the logits.
+ # We take it out (the network doesn't need it)
+ mask = obs.pop("mask", None)
+ else:
+ mask = None
+
+ (pi_h, v_h), state_out = self.net(obs, state_in, context={"first": first})
+
+ pi_logits = self.pi_head(pi_h, mask=mask)
+ vpred = self.value_head(v_h)
+
+ return (pi_logits, vpred, None), state_out
+
+ def get_logprob_of_action(self, pd, action):
+ """
+ Get logprob of taking action `action` given probability distribution
+ (see `get_gradient_for_action` to get this distribution)
+ """
+ ac = tree_map(lambda x: x.unsqueeze(1), action)
+ log_prob = self.pi_head.logprob(ac, pd)
+ assert not th.isnan(log_prob).any()
+ return log_prob[:, 0]
+
+ def get_kl_of_action_dists(self, pd1, pd2):
+ """
+ Get the KL divergence between two action probability distributions
+ """
+ return self.pi_head.kl_divergence(pd1, pd2)
+
+ def get_output_for_observation(self, obs, state_in, first):
+ """
+ Return gradient-enabled outputs for given observation.
+
+ Use `get_logprob_of_action` to get log probability of action
+ with the given probability distribution.
+
+ Returns:
+ - probability distribution given observation
+ - value prediction for given observation
+ - new state
+ """
+ # We need to add a fictitious time dimension everywhere
+ obs = tree_map(lambda x: x.unsqueeze(1), obs)
+ first = first.unsqueeze(1)
+
+ (pd, vpred, _), state_out = self(obs=obs, first=first, state_in=state_in)
+
+ return pd, self.value_head.denormalize(vpred)[:, 0], state_out
+
+ @th.no_grad()
+ def act(self, obs, first, state_in, stochastic: bool = True, taken_action=None, return_pd=False):
+ # We need to add a fictitious time dimension everywhere
+ obs = tree_map(lambda x: x.unsqueeze(1), obs)
+ first = first.unsqueeze(1)
+
+ (pd, vpred, _), state_out = self(obs=obs, first=first, state_in=state_in)
+
+ if taken_action is None:
+ ac = self.pi_head.sample(pd, deterministic=not stochastic)
+ else:
+ ac = tree_map(lambda x: x.unsqueeze(1), taken_action)
+ log_prob = self.pi_head.logprob(ac, pd)
+ assert not th.isnan(log_prob).any()
+
+ # After unsqueezing, squeeze back to remove fictitious time dimension
+ result = {"log_prob": log_prob[:, 0], "vpred": self.value_head.denormalize(vpred)[:, 0]}
+ if return_pd:
+ result["pd"] = tree_map(lambda x: x[:, 0], pd)
+ ac = tree_map(lambda x: x[:, 0], ac)
+
+ return ac, state_out, result
+
+ @th.no_grad()
+ def v(self, obs, first, state_in):
+ """Predict value for a given mdp observation"""
+ obs = tree_map(lambda x: x.unsqueeze(1), obs)
+ first = first.unsqueeze(1)
+
+ (pd, vpred, _), state_out = self(obs=obs, first=first, state_in=state_in)
+
+ # After unsqueezing, squeeze back
+ return self.value_head.denormalize(vpred)[:, 0]
+
+
+class InverseActionNet(MinecraftPolicy):
+ """
+ Args:
+ conv3d_params: PRE impala 3D CNN params. They are just passed into th.nn.Conv3D.
+ """
+
+ def __init__(
+ self,
+ hidsize=512,
+ conv3d_params=None,
+ **MCPoliy_kwargs,
+ ):
+ super().__init__(
+ hidsize=hidsize,
+ # If we're using 3dconv, then we normalize entire impala otherwise don't
+ # normalize the first impala layer since we normalize the input
+ first_conv_norm=conv3d_params is not None,
+ **MCPoliy_kwargs,
+ )
+ self.conv3d_layer = None
+ if conv3d_params is not None:
+ # 3D conv is the first layer, so don't normalize its input
+ conv3d_init_params = deepcopy(self.init_norm_kwargs)
+ conv3d_init_params["group_norm_groups"] = None
+ conv3d_init_params["batch_norm"] = False
+ self.conv3d_layer = FanInInitReLULayer(
+ layer_type="conv3d",
+ log_scope="3d_conv",
+ **conv3d_params,
+ **conv3d_init_params,
+ )
+
+ def forward(self, ob, state_in, context):
+ first = context["first"]
+ x = self.img_preprocess(ob["img"])
+
+ # Conv3D Prior to Impala
+ if self.conv3d_layer is not None:
+ x = self._conv3d_forward(x)
+
+ # Impala Stack
+ x = self.img_process(x)
+
+ if self.recurrent_layer is not None:
+ x, state_out = self.recurrent_layer(x, first, state_in)
+
+ x = F.relu(x, inplace=False)
+
+ pi_latent = self.lastlayer(x)
+ pi_latent = self.final_ln(x)
+ return (pi_latent, None), state_out
+
+ def _conv3d_forward(self, x):
+ # Convert from (B, T, H, W, C) -> (B, H, W, C, T)
+ x = transpose(x, "bthwc", "bcthw")
+ new_x = []
+ for mini_batch in th.split(x, 1):
+ new_x.append(self.conv3d_layer(mini_batch))
+ x = th.cat(new_x)
+ # Convert back
+ x = transpose(x, "bcthw", "bthwc")
+ return x
+
+
+class InverseActionPolicy(nn.Module):
+ def __init__(
+ self,
+ action_space,
+ pi_head_kwargs=None,
+ idm_net_kwargs=None,
+ ):
+ super().__init__()
+ self.action_space = action_space
+
+ self.net = InverseActionNet(**idm_net_kwargs)
+
+ pi_out_size = self.net.output_latent_size()
+
+ pi_head_kwargs = {} if pi_head_kwargs is None else pi_head_kwargs
+
+ self.pi_head = self.make_action_head(pi_out_size=pi_out_size, **pi_head_kwargs)
+
+ def make_action_head(self, **kwargs):
+ return make_action_head(self.action_space, **kwargs)
+
+ def reset_parameters(self):
+ super().reset_parameters()
+ self.net.reset_parameters()
+ self.pi_head.reset_parameters()
+
+ def forward(self, obs, first: th.Tensor, state_in, **kwargs):
+ if isinstance(obs, dict):
+ # We don't want to mutate the obs input.
+ obs = obs.copy()
+
+ # If special "mask" key is in obs,
+ # It's for masking the logits.
+ # We take it out (the network doesn't need it)
+ mask = obs.pop("mask", None)
+ else:
+ mask = None
+
+ (pi_h, _), state_out = self.net(obs, state_in=state_in, context={"first": first}, **kwargs)
+ pi_logits = self.pi_head(pi_h, mask=mask)
+ return (pi_logits, None, None), state_out
+
+ @th.no_grad()
+ def predict(
+ self,
+ obs,
+ deterministic: bool = True,
+ **kwargs,
+ ):
+ (pd, _, _), state_out = self(obs=obs, **kwargs)
+
+ ac = self.pi_head.sample(pd, deterministic=deterministic)
+ log_prob = self.pi_head.logprob(ac, pd)
+
+ assert not th.isnan(log_prob).any()
+
+ result = {"log_prob": log_prob, "pd": pd}
+
+ return ac, state_out, result
+
+ def initial_state(self, batch_size: int):
+ return self.net.initial_state(batch_size)
diff --git a/docs/src/lib/scaled_mse_head.py b/docs/src/lib/scaled_mse_head.py
new file mode 100644
index 0000000..e935355
--- /dev/null
+++ b/docs/src/lib/scaled_mse_head.py
@@ -0,0 +1,50 @@
+from typing import Dict, Optional
+
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+
+from lib.action_head import fan_in_linear
+from lib.normalize_ewma import NormalizeEwma
+
+
+class ScaledMSEHead(nn.Module):
+ """
+ Linear output layer that scales itself so that targets are always normalized to N(0, 1)
+ """
+
+ def __init__(
+ self, input_size: int, output_size: int, norm_type: Optional[str] = "ewma", norm_kwargs: Optional[Dict] = None
+ ):
+ super().__init__()
+ self.input_size = input_size
+ self.output_size = output_size
+ self.norm_type = norm_type
+
+ self.linear = nn.Linear(self.input_size, self.output_size)
+
+ norm_kwargs = {} if norm_kwargs is None else norm_kwargs
+ self.normalizer = NormalizeEwma(output_size, **norm_kwargs)
+
+ def reset_parameters(self):
+ init.orthogonal_(self.linear.weight)
+ fan_in_linear(self.linear)
+ self.normalizer.reset_parameters()
+
+ def forward(self, input_data):
+ return self.linear(input_data)
+
+ def loss(self, prediction, target):
+ """
+ Calculate the MSE loss between output and a target.
+ 'Prediction' has to be normalized while target is denormalized.
+ Loss is calculated in a 'normalized' space.
+ """
+ return F.mse_loss(prediction, self.normalizer(target), reduction="mean")
+
+ def denormalize(self, input_data):
+ """Convert input value from a normalized space into the original one"""
+ return self.normalizer.denormalize(input_data)
+
+ def normalize(self, input_data):
+ return self.normalizer(input_data)
diff --git a/docs/src/lib/torch_util.py b/docs/src/lib/torch_util.py
new file mode 100644
index 0000000..4346dc4
--- /dev/null
+++ b/docs/src/lib/torch_util.py
@@ -0,0 +1,200 @@
+import functools
+import itertools
+import math
+import os
+import pickle
+import re
+import subprocess
+import tempfile
+from contextlib import contextmanager
+from hashlib import md5, sha1
+
+import numpy as np
+import torch as th
+import torch.distributed as dist
+import torch.distributions as dis
+import torch.nn.functional as F
+from torch import nn
+
+import lib.tree_util as tree_util
+from lib import misc
+
+
+def contextmanager_to_decorator(cm):
+ def decorator(fn):
+ @functools.wraps(fn)
+ def newfn(*args, **kwargs):
+ with cm():
+ return fn(*args, **kwargs)
+
+ return newfn
+
+ return decorator
+
+
+def have_cuda():
+ return th.has_cuda
+
+
+def default_device_type():
+ return "cuda" if have_cuda() else "cpu"
+
+
+no_grad = contextmanager_to_decorator(th.no_grad)
+DEFAULT_DEVICE = th.device(type=default_device_type())
+
+
+def set_default_torch_device(device):
+ global DEFAULT_DEVICE
+ DEFAULT_DEVICE = th.device(device)
+
+
+def dev():
+ return DEFAULT_DEVICE
+
+
+def zeros(*args, **kwargs):
+ return th.zeros(*args, **kwargs, device=dev())
+
+
+def ones(*args, **kwargs):
+ return th.ones(*args, **kwargs, device=dev())
+
+
+def arange(*args, **kwargs):
+ return th.arange(*args, **kwargs, device=dev())
+
+
+def NormedLinear(*args, scale=1.0, dtype=th.float32, **kwargs):
+ """
+ nn.Linear but with normalized fan-in init
+ """
+ dtype = parse_dtype(dtype)
+ if dtype == th.float32:
+ out = nn.Linear(*args, **kwargs)
+ elif dtype == th.float16:
+ out = LinearF16(*args, **kwargs)
+ else:
+ raise ValueError(dtype)
+ out.weight.data *= scale / out.weight.norm(dim=1, p=2, keepdim=True)
+ if kwargs.get("bias", True):
+ out.bias.data *= 0
+ return out
+
+
+class LinearF16(nn.Linear):
+ def forward(self, x):
+ return F.linear(x, self.weight.half(), self.bias.half() if self.bias is not None else None)
+
+
+class LayerNormF16(nn.LayerNorm):
+ def forward(self, x):
+ return F.layer_norm(x, self.normalized_shape, self.weight.half(), self.bias.half(), self.eps)
+
+
+def LayerNorm(*args, dtype=th.float32, **kwargs):
+ dtype = parse_dtype(dtype)
+ if dtype == th.float32:
+ out = nn.LayerNorm(*args, **kwargs)
+ elif dtype == th.float16:
+ out = LayerNormF16(*args, **kwargs)
+ else:
+ raise ValueError(dtype)
+ out.weight.no_scale = True
+ return out
+
+
+def flatten_image(x):
+ """
+ Flattens last three dims
+ """
+ *batch_shape, h, w, c = x.shape
+ return x.reshape((*batch_shape, h * w * c))
+
+
+def sequential(layers, x, *args, diag_name=None, use_checkpoint=False):
+ for (i, layer) in enumerate(layers):
+ x = layer(x, *args)
+ return x
+
+
+@no_grad
+def load_average_with_metadata(paths, overrides):
+ n_models = len(paths)
+ model, metadata = load_with_metadata(paths[0], overrides=overrides)
+ for p in model.parameters():
+ p.mul_(1 / n_models)
+ for p in paths[1:]:
+ new_model, _ = load_with_metadata(p, overrides=overrides)
+ for (n1, p1), (n2, p2) in misc.safezip(model.named_parameters(), new_model.named_parameters()):
+ assert n1 == n2, f"names {n1} and {n2} don't match"
+ p1.add_(p2.mul_(1 / n_models))
+ return model, metadata
+
+
+def save_kwargs(fn):
+ """
+ This decorator passes through the user-provided kwargs and adds one more, called
+ save_kwargs, mapping to {"create_fn" : name_of_decorated_fn, "kwargs" : other_kwargs}
+
+ You put on this decorator on a function that creates a pytorch module. This will
+ save the kwargs and the function that was used to create the module.
+ This lets us restore the model state later.
+ """
+
+ @functools.wraps(fn)
+ def wrapper(**kwargs):
+ if "save_kwargs" in kwargs:
+ return fn(**kwargs)
+ else:
+ sk = {**kwargs, "create_fn": f"{fn.__module__}:{fn.__name__}"}
+ return fn(save_kwargs=sk, **kwargs)
+
+ return wrapper
+
+
+def parse_dtype(x):
+ if isinstance(x, th.dtype):
+ return x
+ elif isinstance(x, str):
+ if x == "float32" or x == "float":
+ return th.float32
+ elif x == "float64" or x == "double":
+ return th.float64
+ elif x == "float16" or x == "half":
+ return th.float16
+ elif x == "uint8":
+ return th.uint8
+ elif x == "int8":
+ return th.int8
+ elif x == "int16" or x == "short":
+ return th.int16
+ elif x == "int32" or x == "int":
+ return th.int32
+ elif x == "int64" or x == "long":
+ return th.int64
+ elif x == "bool":
+ return th.bool
+ else:
+ raise ValueError(f"cannot parse {x} as a dtype")
+ else:
+ raise TypeError(f"cannot parse {type(x)} as dtype")
+
+
+def index(x, i):
+ """
+ Batched, broadcasting index of x along dimension i.ndim.
+
+ For example, if x has shape (1, 2, 3, 4, 5) and i has shape (1, 1, 3)
+ then the result has shape (1, 2, 3, 5) and each value in i must be between 0 and 3.
+ """
+ assert x.ndim >= i.ndim + 1
+ gather_dim = i.ndim
+ while i.ndim < x.ndim:
+ i = i.unsqueeze(-1)
+ expand_shape = list(x.shape)
+ expand_shape[gather_dim] = 1
+ i = i.expand(*expand_shape)
+ xi = th.gather(x, gather_dim, i)
+ assert xi.shape[gather_dim] == 1
+ return xi.squeeze(gather_dim)
diff --git a/docs/src/lib/tree_util.py b/docs/src/lib/tree_util.py
new file mode 100644
index 0000000..96de9b3
--- /dev/null
+++ b/docs/src/lib/tree_util.py
@@ -0,0 +1,280 @@
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Copied this from jax, made it self-contained
+# Currently just used for improved_checkpoint
+
+import collections
+import functools
+import itertools as it
+from collections.abc import Collection
+from typing import Dict, List, Optional
+
+
+def unzip2(xys):
+ xs = []
+ ys = []
+ for x, y in xys:
+ xs.append(x)
+ ys.append(y)
+ return tuple(xs), tuple(ys)
+
+
+def partial(fun, *args, **kwargs):
+ wrapped = functools.partial(fun, *args, **kwargs)
+ functools.update_wrapper(wrapped, fun)
+ wrapped._bound_args = args # pylint: disable=protected-access
+ return wrapped
+
+
+def safe_zip(*args: Collection) -> List[tuple]:
+ n = len(args[0])
+ for arg in args[1:]:
+ assert len(arg) == n, "length mismatch: {}".format(list(map(len, args)))
+ return list(zip(*args))
+
+
+def safe_map(f, *args):
+ args = list(map(list, args))
+ n = len(args[0])
+ for arg in args[1:]:
+ assert len(arg) == n, "length mismatch: {}".format(list(map(len, args)))
+ return list(map(f, *args))
+
+
+def tree_map(f, tree, treat_as_leaves: Optional[List] = None):
+ """Map a function over a pytree to produce a new pytree.
+
+ Args:
+ f: function to be applied at each leaf.
+ tree: a pytree to be mapped over.
+
+ Returns:
+ A new pytree with the same structure as `tree` but with the value at each
+ leaf given by `f(x)` where `x` is the value at the corresponding leaf in
+ `tree`.
+ """
+ if treat_as_leaves is None:
+ treat_as_leaves = []
+ node_type = node_types.get(type(tree))
+ if node_type and type(tree) not in treat_as_leaves:
+ children, node_spec = node_type.to_iterable(tree)
+ new_children = [tree_map(f, child, treat_as_leaves) for child in children]
+ return node_type.from_iterable(node_spec, new_children)
+ else:
+ return f(tree)
+
+
+def tree_multimap(f, tree, *rest, treat_as_leaves: Optional[List] = None):
+ """Map a multi-input function over pytree args to produce a new pytree.
+
+ Args:
+ f: function that takes `1 + len(rest)` arguments, to be applied at the
+ corresponding leaves of the pytrees.
+ tree: a pytree to be mapped over, with each leaf providing the first
+ positional argument to `f`.
+ *rest: a tuple of pytrees, each with the same structure as `tree`.
+
+ Returns:
+ A new pytree with the same structure as `tree` but with the value at each
+ leaf given by `f(x, *xs)` where `x` is the value at the corresponding leaf
+ in `tree` and `xs` is the tuple of values at corresponding leaves in `rest`.
+ """
+
+ if treat_as_leaves is None:
+ treat_as_leaves = []
+ node_type = node_types.get(type(tree))
+ if node_type and type(tree) not in treat_as_leaves:
+ children, node_spec = node_type.to_iterable(tree)
+ all_children = [children]
+ for other_tree in rest:
+ other_children, other_node_data = node_type.to_iterable(other_tree)
+ if other_node_data != node_spec:
+ raise TypeError("Mismatch: {} != {}".format(other_node_data, node_spec))
+ all_children.append(other_children)
+
+ new_children = [tree_multimap(f, *xs, treat_as_leaves=treat_as_leaves) for xs in zip(*all_children)]
+ return node_type.from_iterable(node_spec, new_children)
+ else:
+ return f(tree, *rest)
+
+
+def prefix_multimap(f, treedef, tree, *rest):
+ """Like tree_multimap but only maps down through a tree prefix."""
+ if isinstance(treedef, PyLeaf):
+ return f(tree, *rest)
+ else:
+ node_type = node_types.get(type(tree))
+ if node_type != treedef.node_type:
+ raise TypeError("Mismatch: {} != {}".format(treedef.node_type, node_type))
+ children, node_data = node_type.to_iterable(tree)
+ if node_data != treedef.node_data:
+ raise TypeError("Mismatch: {} != {}".format(treedef.node_data, node_data))
+ all_children = [children]
+ for other_tree in rest:
+ other_children, other_node_data = node_type.to_iterable(other_tree)
+ if other_node_data != node_data:
+ raise TypeError("Mismatch: {} != {}".format(other_node_data, node_data))
+ all_children.append(other_children)
+ all_children = zip(*all_children)
+
+ new_children = [prefix_multimap(f, td, *xs) for td, xs in zip(treedef.children, all_children)]
+ return node_type.from_iterable(node_data, new_children)
+
+
+def walk_pytree(f_node, f_leaf, tree, treat_as_leaves: Optional[List] = None):
+ node_type = node_types.get(type(tree))
+ if treat_as_leaves is None:
+ treat_as_leaves = []
+
+ if node_type and type(tree) not in treat_as_leaves:
+ children, node_spec = node_type.to_iterable(tree)
+ proc_children, child_specs = unzip2([walk_pytree(f_node, f_leaf, child, treat_as_leaves) for child in children])
+ tree_def = PyTreeDef(node_type, node_spec, child_specs)
+ return f_node(proc_children), tree_def
+ else:
+ return f_leaf(tree), PyLeaf()
+
+
+def build_tree(treedef, xs):
+ if isinstance(treedef, PyLeaf):
+ return xs
+ else:
+ # We use 'iter' for clearer error messages
+ children = safe_map(build_tree, iter(treedef.children), iter(xs))
+ return treedef.node_type.from_iterable(treedef.node_data, children)
+
+
+def _tree_unflatten(xs, treedef):
+ if isinstance(treedef, PyLeaf):
+ return next(xs)
+ else:
+ children = safe_map(partial(_tree_unflatten, xs), treedef.children)
+ return treedef.node_type.from_iterable(treedef.node_data, children)
+
+
+def _num_leaves(treedef):
+ return 1 if isinstance(treedef, PyLeaf) else sum(safe_map(_num_leaves, treedef.children))
+
+
+def _nested_treedef(inner, outer):
+ # just used in tree_transpose error checking
+ if isinstance(outer, PyLeaf):
+ return inner
+ else:
+ children = safe_map(partial(_nested_treedef, inner), outer.children)
+ return PyTreeDef(outer.node_type, outer.node_data, tuple(children))
+
+
+class PyTreeDef(object):
+ def __init__(self, node_type, node_data, children):
+ self.node_type = node_type
+ self.node_data = node_data
+ self.children = children
+
+ def __repr__(self):
+ if self.node_data is None:
+ data_repr = ""
+ else:
+ data_repr = "[{}]".format(self.node_data)
+
+ return "PyTree({}{}, [{}])".format(self.node_type.name, data_repr, ",".join(safe_map(repr, self.children)))
+
+ def __hash__(self):
+ return hash((self.node_type, self.node_data, tuple(self.children)))
+
+ def __eq__(self, other):
+ if isinstance(other, PyLeaf):
+ return False
+ else:
+ return self.node_type == other.node_type and self.node_data == other.node_data and self.children == other.children
+
+ def __ne__(self, other):
+ return not self == other
+
+
+class PyLeaf(object):
+ def __repr__(self):
+ return "*"
+
+ def __eq__(self, other):
+ return isinstance(other, PyLeaf)
+
+
+class NodeType(object):
+ def __init__(self, name, to_iterable, from_iterable):
+ self.name = name
+ self.to_iterable = to_iterable
+ self.from_iterable = from_iterable
+
+
+node_types: Dict[type, NodeType] = {}
+
+
+def register_pytree_node(py_type, to_iterable, from_iterable):
+ assert py_type not in node_types
+ node_types[py_type] = NodeType(str(py_type), to_iterable, from_iterable)
+
+
+def tuple_to_iterable(xs):
+ return xs, None
+
+
+def tuple_from_iterable(_keys, xs):
+ return tuple(xs)
+
+
+def list_to_iterable(xs):
+ return tuple(xs), None
+
+
+def list_from_iterable(_keys, xs):
+ return list(xs)
+
+
+def dict_to_iterable(xs):
+ keys = tuple(sorted(xs.keys()))
+ return tuple(map(xs.get, keys)), keys
+
+
+def dict_from_iterable(keys, xs):
+ return dict(safe_zip(keys, xs))
+
+
+def ordered_dict_from_iterable(keys, xs):
+ return collections.OrderedDict(safe_zip(keys, xs))
+
+
+def default_dict_to_iterable(xs):
+ return (tuple(xs.values()), (xs.default_factory, tuple(xs.keys())))
+
+
+def default_dict_from_iterable(keys, xs):
+ return collections.defaultdict(keys[0], safe_zip(keys[1], xs))
+
+
+def none_to_iterable(_xs):
+ return (), None
+
+
+def none_from_iterable(_keys, _xs):
+ return None
+
+
+register_pytree_node(tuple, tuple_to_iterable, tuple_from_iterable)
+register_pytree_node(list, list_to_iterable, list_from_iterable)
+register_pytree_node(dict, dict_to_iterable, dict_from_iterable)
+register_pytree_node(collections.OrderedDict, dict_to_iterable, ordered_dict_from_iterable)
+register_pytree_node(collections.defaultdict, default_dict_to_iterable, default_dict_from_iterable)
+register_pytree_node(type(None), none_to_iterable, none_from_iterable)
diff --git a/docs/src/lib/util.py b/docs/src/lib/util.py
new file mode 100644
index 0000000..abcd3fa
--- /dev/null
+++ b/docs/src/lib/util.py
@@ -0,0 +1,276 @@
+from typing import Dict, Optional
+
+import torch as th
+from torch import nn
+from torch.nn import functional as F
+
+import lib.torch_util as tu
+from lib.masked_attention import MaskedAttention
+from lib.minecraft_util import store_args
+from lib.tree_util import tree_map
+
+
+def get_module_log_keys_recursive(m: nn.Module):
+ """Recursively get all keys that a module and its children want to log."""
+ keys = []
+ if hasattr(m, "get_log_keys"):
+ keys += m.get_log_keys()
+ for c in m.children():
+ keys += get_module_log_keys_recursive(c)
+ return keys
+
+
+class FanInInitReLULayer(nn.Module):
+ """Implements a slightly modified init that correctly produces std 1 outputs given ReLU activation
+ :param inchan: number of input channels
+ :param outchan: number of output channels
+ :param layer_args: positional layer args
+ :param layer_type: options are "linear" (dense layer), "conv" (2D Convolution), "conv3d" (3D convolution)
+ :param init_scale: multiplier on initial weights
+ :param batch_norm: use batch norm after the layer (for 2D data)
+ :param group_norm_groups: if not None, use group norm with this many groups after the layer. Group norm 1
+ would be equivalent of layernorm for 2D data.
+ :param layer_norm: use layernorm after the layer (for 1D data)
+ :param layer_kwargs: keyword arguments for the layer
+ """
+
+ @store_args
+ def __init__(
+ self,
+ inchan: int,
+ outchan: int,
+ *layer_args,
+ layer_type: str = "conv",
+ init_scale: int = 1,
+ batch_norm: bool = False,
+ batch_norm_kwargs: Dict = {},
+ group_norm_groups: Optional[int] = None,
+ layer_norm: bool = False,
+ use_activation=True,
+ log_scope: Optional[str] = None,
+ **layer_kwargs,
+ ):
+ super().__init__()
+
+ # Normalization
+ self.norm = None
+ if batch_norm:
+ self.norm = nn.BatchNorm2d(inchan, **batch_norm_kwargs)
+ elif group_norm_groups is not None:
+ self.norm = nn.GroupNorm(group_norm_groups, inchan)
+ elif layer_norm:
+ self.norm = nn.LayerNorm(inchan)
+
+ layer = dict(conv=nn.Conv2d, conv3d=nn.Conv3d, linear=nn.Linear)[layer_type]
+ self.layer = layer(inchan, outchan, bias=self.norm is None, *layer_args, **layer_kwargs)
+
+ # Init Weights (Fan-In)
+ self.layer.weight.data *= init_scale / self.layer.weight.norm(
+ dim=tuple(range(1, self.layer.weight.data.ndim)), p=2, keepdim=True
+ )
+ # Init Bias
+ if self.layer.bias is not None:
+ self.layer.bias.data *= 0
+
+ def forward(self, x):
+ """Norm after the activation. Experimented with this for both IAM and BC and it was slightly better."""
+ if self.norm is not None:
+ x = self.norm(x)
+ x = self.layer(x)
+ if self.use_activation:
+ x = F.relu(x, inplace=True)
+ return x
+
+ def get_log_keys(self):
+ return [
+ f"activation_mean/{self.log_scope}",
+ f"activation_std/{self.log_scope}",
+ ]
+
+
+class ResidualRecurrentBlocks(nn.Module):
+ @store_args
+ def __init__(
+ self,
+ n_block=2,
+ recurrence_type="multi_layer_lstm",
+ is_residual=True,
+ **block_kwargs,
+ ):
+ super().__init__()
+ init_scale = n_block ** -0.5 if is_residual else 1
+ self.blocks = nn.ModuleList(
+ [
+ ResidualRecurrentBlock(
+ **block_kwargs,
+ recurrence_type=recurrence_type,
+ is_residual=is_residual,
+ init_scale=init_scale,
+ block_number=i,
+ )
+ for i in range(n_block)
+ ]
+ )
+
+ def forward(self, x, first, state):
+ state_out = []
+ assert len(state) == len(
+ self.blocks
+ ), f"Length of state {len(state)} did not match length of blocks {len(self.blocks)}"
+ for block, _s_in in zip(self.blocks, state):
+ x, _s_o = block(x, first, _s_in)
+ state_out.append(_s_o)
+ return x, state_out
+
+ def initial_state(self, batchsize):
+ if "lstm" in self.recurrence_type:
+ return [None for b in self.blocks]
+ else:
+ return [b.r.initial_state(batchsize) for b in self.blocks]
+
+
+class ResidualRecurrentBlock(nn.Module):
+ @store_args
+ def __init__(
+ self,
+ hidsize,
+ timesteps,
+ init_scale=1,
+ recurrence_type="multi_layer_lstm",
+ is_residual=True,
+ use_pointwise_layer=True,
+ pointwise_ratio=4,
+ pointwise_use_activation=False,
+ attention_heads=8,
+ attention_memory_size=2048,
+ attention_mask_style="clipped_causal",
+ log_scope="resblock",
+ block_number=0,
+ ):
+ super().__init__()
+ self.log_scope = f"{log_scope}{block_number}"
+ s = init_scale
+ if use_pointwise_layer:
+ if is_residual:
+ s *= 2 ** -0.5 # second residual
+ self.mlp0 = FanInInitReLULayer(
+ hidsize,
+ hidsize * pointwise_ratio,
+ init_scale=1,
+ layer_type="linear",
+ layer_norm=True,
+ log_scope=self.log_scope + "/ptwise_mlp0",
+ )
+ self.mlp1 = FanInInitReLULayer(
+ hidsize * pointwise_ratio,
+ hidsize,
+ init_scale=s,
+ layer_type="linear",
+ use_activation=pointwise_use_activation,
+ log_scope=self.log_scope + "/ptwise_mlp1",
+ )
+
+ self.pre_r_ln = nn.LayerNorm(hidsize)
+ if recurrence_type in ["multi_layer_lstm", "multi_layer_bilstm"]:
+ self.r = nn.LSTM(hidsize, hidsize, batch_first=True)
+ nn.init.normal_(self.r.weight_hh_l0, std=s * (self.r.weight_hh_l0.shape[0] ** -0.5))
+ nn.init.normal_(self.r.weight_ih_l0, std=s * (self.r.weight_ih_l0.shape[0] ** -0.5))
+ self.r.bias_hh_l0.data *= 0
+ self.r.bias_ih_l0.data *= 0
+ elif recurrence_type == "transformer":
+ self.r = MaskedAttention(
+ input_size=hidsize,
+ timesteps=timesteps,
+ memory_size=attention_memory_size,
+ heads=attention_heads,
+ init_scale=s,
+ norm="none",
+ log_scope=log_scope + "/sa",
+ use_muP_factor=True,
+ mask=attention_mask_style,
+ )
+
+ def forward(self, x, first, state):
+ residual = x
+ x = self.pre_r_ln(x)
+ x, state_out = recurrent_forward(
+ self.r,
+ x,
+ first,
+ state,
+ reverse_lstm=self.recurrence_type == "multi_layer_bilstm" and (self.block_number + 1) % 2 == 0,
+ )
+ if self.is_residual and "lstm" in self.recurrence_type: # Transformer already residual.
+ x = x + residual
+ if self.use_pointwise_layer:
+ # Residual MLP
+ residual = x
+ x = self.mlp1(self.mlp0(x))
+ if self.is_residual:
+ x = x + residual
+ return x, state_out
+
+
+def recurrent_forward(module, x, first, state, reverse_lstm=False):
+ if isinstance(module, nn.LSTM):
+ if state is not None:
+ # In case recurrent models do not accept a "first" argument we zero out the hidden state here
+ mask = 1 - first[:, 0, None, None].to(th.float)
+ state = tree_map(lambda _s: _s * mask, state)
+ state = tree_map(lambda _s: _s.transpose(0, 1), state) # NL, B, H
+ if reverse_lstm:
+ x = th.flip(x, [1])
+ x, state_out = module(x, state)
+ if reverse_lstm:
+ x = th.flip(x, [1])
+ state_out = tree_map(lambda _s: _s.transpose(0, 1), state_out) # B, NL, H
+ return x, state_out
+ else:
+ return module(x, first, state)
+
+
+def _banded_repeat(x, t):
+ """
+ Repeats x with a shift.
+ For example (ignoring the batch dimension):
+
+ _banded_repeat([A B C D E], 4)
+ =
+ [D E 0 0 0]
+ [C D E 0 0]
+ [B C D E 0]
+ [A B C D E]
+ """
+ b, T = x.shape
+ x = th.cat([x, x.new_zeros(b, t - 1)], dim=1)
+ result = x.unfold(1, T, 1).flip(1)
+ return result
+
+
+def bandify(b_nd, t, T):
+ """
+ b_nd -> D_ntT, where
+ "n" indexes over basis functions
+ "d" indexes over time differences
+ "t" indexes over output time
+ "T" indexes over input time
+ only t >= T is nonzero
+ B_ntT[n, t, T] = b_nd[n, t - T]
+ """
+ nbasis, bandsize = b_nd.shape
+ b_nd = b_nd[:, th.arange(bandsize - 1, -1, -1)]
+ if bandsize >= T:
+ b_nT = b_nd[:, -T:]
+ else:
+ b_nT = th.cat([b_nd.new_zeros(nbasis, T - bandsize), b_nd], dim=1)
+ D_tnT = _banded_repeat(b_nT, t)
+ return D_tnT
+
+
+def get_norm(name, d, dtype=th.float32):
+ if name == "none":
+ return lambda x: x
+ elif name == "layer":
+ return tu.LayerNorm(d, dtype=dtype)
+ else:
+ raise NotImplementedError(name)
diff --git a/docs/src/lib/xf.py b/docs/src/lib/xf.py
new file mode 100644
index 0000000..095a98f
--- /dev/null
+++ b/docs/src/lib/xf.py
@@ -0,0 +1,479 @@
+"""
+Implementation of transformer and reshaping-based sparse transformer
+"""
+import functools
+import math
+
+import torch as th
+from torch import nn
+from torch.nn import functional as F
+
+from lib import misc, mlp
+from lib import torch_util as tu
+from lib import util
+
+SENTINEL = 0.1337
+
+
+def attention(
+ Q_bte,
+ K_bTe,
+ V_bTe,
+ dtype,
+ mask=True,
+ extra_btT=None,
+ maxlen=None,
+ check_sentinel=False,
+ use_muP_factor=False,
+):
+ """
+ performs softmax(Q*K)*V operation
+
+ t : output (write) time axis, possibly size=1 for just the last timestep
+ T : input (read) time axis
+ t < T is OK
+
+ 'check_sentinel' is used when you want to make it impossible to attend to certain keys.
+ All keys where every value is equal to the constant SENTINEL will be ignored.
+ Currently this is only used by StridedAttn.
+ """
+ assert Q_bte.dtype == K_bTe.dtype == dtype, f"{Q_bte.dtype}, {K_bTe.dtype}, {dtype} must all match"
+ e = Q_bte.shape[2]
+ if check_sentinel:
+ invalid = (K_bTe == SENTINEL).int().sum(dim=-1) == e
+ invalid = misc.reshape(invalid, "b, T", "b, 1, T")
+ if isinstance(mask, th.Tensor):
+ bias = (~mask).float() * -1e9
+ elif mask:
+ bias = get_attn_bias_cached(Q_bte.shape[1], K_bTe.shape[1], maxlen=maxlen, device=Q_bte.device, dtype=th.float32)
+ else:
+ bias = Q_bte.new_zeros((), dtype=th.float32)
+ if extra_btT is not None:
+ bias = bias + extra_btT
+ # Equivalent to bias + (1 / math.sqrt(e)) * th.einsum("bte,bpe->btp", Q_bte, K_bte)
+ # but faster:
+ logit_btT = th.baddbmm(
+ bias,
+ Q_bte.float(),
+ K_bTe.float().transpose(-1, -2),
+ alpha=(1 / e) if use_muP_factor else (1 / math.sqrt(e)),
+ )
+ if check_sentinel:
+ logit_btT = logit_btT - 1e9 * invalid.float()
+ W_btT = th.softmax(logit_btT, dim=2).to(dtype)
+ if callable(V_bTe):
+ # This is used by the sharded video model to defer waiting on
+ # the broadcast of the values until they're needed
+ V_bTe = V_bTe()
+ # th.einsum only lets you use lowercase letters, so 'p' for 'past'
+ # means 'T'
+ A_bte = th.einsum("btp,bpe->bte", W_btT, V_bTe)
+ return A_bte
+
+
+class Attn:
+ """
+ Defines an attention mechanism
+ All the mechanisms here can be defined by two operations:
+ 1. preprocessing Q,K,V,R[=relative attention query]
+ to move axes from embedding dimension to
+ batch dimension, and possibly doing shifts.
+ 2. postprocessing the final result to move axes back to embedding
+ axis.
+ """
+
+ def __init__(self, mask, maxlen):
+ self.mask = mask
+ self.maxlen = maxlen
+
+ def preproc_qkv(self, Q_bte, K_bte, V_bte):
+ raise NotImplementedError
+
+ def preproc_r(self, R_btn):
+ raise NotImplementedError
+
+
+def split_heads(x_bte, h):
+ b, t, e = x_bte.shape
+ assert e % h == 0, "Embsize must be divisible by number of heads"
+ q = e // h
+ x_bthq = x_bte.reshape((b, t, h, q))
+ x_bhtq = misc.transpose(x_bthq, "bthq", "bhtq")
+ x_Btq = x_bhtq.reshape((b * h, t, q))
+ return x_Btq
+
+
+class All2All(Attn):
+ def __init__(self, nhead, maxlen, mask=True, head_dim=None):
+ super().__init__(mask=mask, maxlen=maxlen)
+ assert (nhead is None) != (head_dim is None), "exactly one of nhead and head_dim must be specified"
+ self.h = nhead
+ self.head_dim = head_dim
+
+ def preproc_qkv(self, *xs):
+ q = xs[0].shape[-1]
+ for x in xs:
+ assert x.shape[-1] == q, "embedding dimensions do not match"
+ h = self.h or misc.exact_div(q, self.head_dim)
+ postproc = functools.partial(self.postproc_a, h=h)
+ return (postproc, *tuple(split_heads(x, h) for x in xs))
+
+ def preproc_r(self, R_btn):
+ _, ret = self.preproc_qkv(R_btn)
+ return ret
+
+ def postproc_a(self, A_Btq, h):
+ B, t, q = A_Btq.shape
+ b = B // h
+ A_bhtq = A_Btq.reshape((b, h, t, q))
+ A_bthq = misc.transpose(A_bhtq, "bhtq", "bthq")
+ A_bte = A_bthq.reshape((b, t, h * q))
+ return A_bte
+
+
+def _required_padding(dim, target_div):
+ if dim % target_div == 0:
+ return 0
+ else:
+ return target_div - dim % target_div
+
+
+class StridedAttn(Attn):
+ def __init__(self, nhead, stride, maxlen, mask=True):
+ super().__init__(mask=mask, maxlen=maxlen)
+ self.h = nhead
+ self.stride = stride
+
+ def _preproc(self, x, name, Q_t=None, Q_pad=None):
+ x, undo = misc.reshape_undo(x, "b, t*stride, e", "b, 1, t, stride*e", stride=self.stride)
+ if name == "Q":
+ Q_pad = _required_padding(x.shape[2], self.maxlen)
+ original_t = x.shape[2]
+ x = F.pad(x, (0, 0, 0, Q_pad), value=SENTINEL)
+ undo = misc.compose_undo(undo, lambda x: x[:, :, :original_t])
+ if name == "Q":
+ Q_t = x.shape[2]
+ assert Q_t % self.maxlen == 0, f"{Q_t} % {self.maxlen} != 0"
+ else:
+ required_len = Q_t + self.maxlen
+ if x.shape[2] < required_len:
+ x = F.pad(x, (0, 0, required_len - x.shape[2], 0), value=SENTINEL)
+ assert x.shape[2] >= required_len
+ back = x[:, :, -Q_t - self.maxlen : -self.maxlen]
+ front = x[:, :, -Q_t:]
+ x = th.cat([back, front], dim=1)
+ _, _, t, _ = x.shape
+ assert t == Q_t, f"{t} != {Q_t}"
+ x, undo = misc.reshape_undo(
+ x,
+ "b, pad_shift, t*maxlen, stride*h*q",
+ "b, pad_shift, t, maxlen, stride, h, q",
+ maxlen=self.maxlen,
+ h=self.h,
+ stride=self.stride,
+ undo=undo,
+ )
+ x, undo = misc.transpose_undo(x, "bptmshq", "bthspmq", undo=undo)
+ x, undo = misc.reshape_undo(
+ x,
+ "b, t, h, stride, pad_shift, maxlen, q",
+ "b*t*h*stride, pad_shift*maxlen, q",
+ undo=undo,
+ )
+ if name == "Q":
+ return x, undo, Q_t, Q_pad
+ else:
+ return x
+
+ def preproc_qkv(self, Q_bte, K_bte, V_bte):
+ pad = _required_padding(Q_bte.shape[1], self.stride)
+ if pad:
+ Q_bte = F.pad(Q_bte, (0, 0, 0, pad), value=SENTINEL)
+ K_bte = F.pad(K_bte, (0, 0, 0, pad), value=SENTINEL) if K_bte is not None else None
+ V_bte = F.pad(V_bte, (0, 0, 0, pad), value=SENTINEL) if V_bte is not None else None
+ undo = lambda x, pad=pad: x[:, :-pad]
+ else:
+ undo = None
+ if K_bte is not None:
+ pad = _required_padding(K_bte.shape[1], self.stride)
+ if pad:
+ K_bte = F.pad(K_bte, (0, 0, pad, 0), value=SENTINEL)
+ V_bte = F.pad(V_bte, (0, 0, pad, 0), value=SENTINEL)
+ assert Q_bte.shape[1] % self.stride == 0
+ assert K_bte is None or K_bte.shape[1] % self.stride == 0
+ assert V_bte is None or V_bte.shape[1] % self.stride == 0
+ Q, postproc, Q_t, Q_pad = self._preproc(Q_bte, "Q")
+ postproc = misc.compose_undo(undo, postproc)
+ return (
+ postproc,
+ Q,
+ self._preproc(K_bte, "K", Q_t=Q_t, Q_pad=Q_pad) if K_bte is not None else None,
+ self._preproc(V_bte, "V", Q_t=Q_t, Q_pad=Q_pad) if V_bte is not None else None,
+ )
+
+ def preproc_r(self, R_bte):
+ _, R, _, _ = self.preproc_qkv(R_bte, None, None)
+ return R
+
+
+Q_SCALE = 0.1
+K_SCALE = 0.2
+V_SCALE = 1.0
+PROJ_SCALE = 1.0
+MLP0_SCALE = 1.0
+MLP1_SCALE = 1.0
+R_SCALE = 0.1
+B_SCALE = 0.2
+
+
+class AttentionLayerBase(nn.Module):
+ def __init__(
+ self,
+ *,
+ attn,
+ scale,
+ x_size,
+ c_size,
+ qk_size,
+ v_size,
+ dtype,
+ relattn=False,
+ seqlens=None,
+ separate=False,
+ ):
+ super().__init__()
+ dtype = tu.parse_dtype(dtype)
+ self.attn = attn
+ self.x_size = x_size
+ self.c_size = c_size
+ s = math.sqrt(scale)
+ separgs = dict(seqlens=seqlens, separate=separate)
+ self.q_layer = MultiscaleLinear(x_size, qk_size, name="q", scale=Q_SCALE, dtype=dtype, **separgs)
+ self.k_layer = MultiscaleLinear(c_size, qk_size, name="k", scale=K_SCALE, bias=False, dtype=dtype, **separgs)
+ self.v_layer = MultiscaleLinear(c_size, v_size, name="v", scale=V_SCALE * s, bias=False, dtype=dtype, **separgs)
+ self.proj_layer = MultiscaleLinear(v_size, x_size, name="proj", scale=PROJ_SCALE * s, dtype=dtype, **separgs)
+ self.relattn = relattn
+ maxlen = attn.maxlen
+ assert maxlen > 0 or not attn.mask
+ if self.relattn:
+ nbasis = 10
+ self.r_layer = tu.NormedLinear(x_size, nbasis * attn.h, scale=R_SCALE, dtype=dtype)
+ self.b_nd = nn.Parameter(th.randn(nbasis, maxlen) * B_SCALE)
+ self.maxlen = maxlen
+ self.dtype = dtype
+
+ def relattn_logits(self, X_bte, T):
+ R_btn = self.r_layer(X_bte).float()
+ R_btn = self.attn.preproc_r(R_btn)
+ t = R_btn.shape[1]
+ D_ntT = util.bandify(self.b_nd, t, T)
+ extra_btT = th.einsum("btn,ntp->btp", R_btn, D_ntT)
+ return extra_btT
+
+
+def quick_gelu(x):
+ return x * th.sigmoid(1.702 * x)
+
+
+def act(actname, x):
+ if actname == "relu":
+ return F.relu(x)
+ elif actname == "gelu":
+ return quick_gelu(x)
+ elif actname == "none":
+ return x
+ else:
+ raise NotImplementedError(actname)
+
+
+class SelfAttentionLayer(AttentionLayerBase):
+ """
+ Residual attention layer that takes a single tensor x and has it attend to itself
+ Has the form
+ output = x + f(x)
+ """
+
+ def __init__(
+ self,
+ x_size,
+ attn,
+ scale,
+ dtype="float32",
+ norm="layer",
+ cache_keep_len=None,
+ relattn=False,
+ log_scope="sa",
+ use_muP_factor=False,
+ **kwargs,
+ ):
+ super().__init__(
+ x_size=x_size,
+ c_size=x_size,
+ qk_size=x_size,
+ v_size=x_size,
+ attn=attn,
+ scale=scale,
+ relattn=relattn,
+ dtype=dtype,
+ **kwargs,
+ )
+ self.ln_x = util.get_norm(norm, x_size, dtype=dtype)
+ if cache_keep_len is None:
+ if hasattr(attn, "cache_keep_len"):
+ cache_keep_len = attn.cache_keep_len
+ else:
+ if isinstance(attn, StridedAttn):
+ stride = attn.stride
+ else:
+ stride = 1
+ cache_keep_len = stride * attn.maxlen
+ self.cache_keep_len = cache_keep_len
+ self.log_scope = log_scope
+ self.use_muP_factor = use_muP_factor
+
+ def residual(self, X_bte, state):
+ X_bte = self.ln_x(X_bte)
+ Q_bte = self.q_layer(X_bte)
+ K_bte = self.k_layer(X_bte)
+ V_bte = self.v_layer(X_bte)
+ if state:
+ state, K_bte, V_bte = self.update_state(state, K_bte, V_bte)
+ postproc_closure, Q_bte, K_bte, V_bte = self.attn.preproc_qkv(Q_bte, K_bte, V_bte)
+ extra_btT = self.relattn_logits(X_bte, K_bte.shape[1]) if self.relattn else None
+ A_bte = attention(
+ Q_bte,
+ K_bte,
+ V_bte,
+ mask=self.attn.mask,
+ extra_btT=extra_btT,
+ maxlen=self.maxlen,
+ dtype=self.dtype,
+ check_sentinel=isinstance(self.attn, StridedAttn),
+ use_muP_factor=self.use_muP_factor,
+ )
+ A_bte = postproc_closure(A_bte)
+ Aproj_bte = self.proj_layer(A_bte)
+ return Aproj_bte, state
+
+ def forward(self, X_bte, state):
+ R_bte, state = self.residual(X_bte, state)
+ return X_bte + R_bte, state
+
+ def stateless_forward(self, X_bte):
+ out_bte, _state = self.forward(X_bte, None)
+ return out_bte
+
+ def update_state(self, state, K_bte, V_bte):
+ def append(prev, new):
+ """
+ Given `prev` keys from cache, and `new` keys,
+ returns (cache, full), where
+ - cache goes into the output state, length chosen so that on the
+ next timestep, there are enough cached timesteps to get the full
+ context of lenth self.maxlen.
+ - full is used for the current forward pass, with length chosen so
+ that the first timestep new[:, 0] gets to see a context of
+ self.maxlen.
+ """
+ tprev = prev.shape[1]
+ startfull = max(tprev - self.cache_keep_len, 0)
+ full = th.cat([prev[:, startfull:], new], dim=1)
+ outstate = full[:, max(full.shape[1] - (self.cache_keep_len), 0) :]
+ # To see that the preceding slicing is correct, consider the case
+ # that maxlen==1. Then `full` only consists of `new`, and
+ # `outstate` is empty
+ return outstate, full
+
+ instate_K, instate_V = state
+ outstate_K, K_bte = append(instate_K, K_bte)
+ outstate_V, V_bte = append(instate_V, V_bte)
+ assert outstate_K.shape[-2] <= self.cache_keep_len
+ return (outstate_K, outstate_V), K_bte, V_bte
+
+ def initial_state(self, batchsize, initial_T=0):
+ return (
+ tu.zeros((batchsize, initial_T, self.x_size), dtype=self.dtype),
+ tu.zeros((batchsize, initial_T, self.x_size), dtype=self.dtype),
+ )
+
+ def empty_state(self):
+ return None
+
+
+class PointwiseLayer(nn.Module):
+ """
+ Residual MLP applied at each timestep
+ """
+
+ def __init__(self, x_size, scale, dtype, norm, actname="relu", mlp_ratio=2):
+ super().__init__()
+ s = math.sqrt(scale)
+ self.ln = util.get_norm(norm, x_size, dtype=dtype)
+ self.mlp = mlp.MLP(
+ insize=x_size,
+ nhidlayer=1,
+ outsize=x_size,
+ hidsize=int(x_size * mlp_ratio),
+ hidactiv=functools.partial(act, actname),
+ dtype=dtype,
+ )
+ self.mlp.layers[0].weight.data *= MLP0_SCALE * s
+ self.mlp.layers[1].weight.data *= MLP1_SCALE * s
+
+ def residual(self, x):
+ x = self.ln(x)
+ x = self.mlp(x)
+ return x
+
+ def forward(self, x):
+ return x + self.residual(x)
+
+
+def _is_separate(sep, name):
+ if isinstance(sep, bool):
+ return sep
+ assert isinstance(sep, set)
+ if name in sep:
+ sep.remove(name)
+ return True
+ else:
+ return False
+
+
+def make_maybe_multiscale(make_fn, *args, seqlens, separate, name, **kwargs):
+ """
+ This function either creates one instance of a module or creates
+ a separate instance of the module for each resolution of the image,
+ determined by the `separate` parameter. We create separate modules
+ if `separate` is True or if `separate` is a set containing `name`.
+ """
+ if _is_separate(separate, name):
+ modules = [make_fn(*args, **kwargs) for _ in seqlens]
+ return SplitCallJoin(modules, seqlens)
+ else:
+ return make_fn(*args, **kwargs)
+
+
+class SplitCallJoin(nn.Module):
+ def __init__(self, mods, seqlens):
+ super().__init__()
+ self.mods = nn.ModuleList(mods)
+ self.seqlens = seqlens
+
+ def forward(self, x):
+ tl = sum(self.seqlens)
+ x, undo = misc.reshape_undo(x, "..., z*tl, e", "..., z, tl, e", tl=tl)
+ x = list(th.split(x, self.seqlens, dim=-2))
+ new_x = []
+ for x, mod in misc.safezip(x, self.mods):
+ x, this_undo = misc.reshape_undo(x, "..., z, l, e", "..., z*l, e")
+ x = mod(x)
+ x = this_undo(x)
+ new_x.append(x)
+ x = th.cat(new_x, dim=-2)
+ x = undo(x)
+ return x
+
+
+MultiscaleLinear = functools.partial(make_maybe_multiscale, tu.NormedLinear)
+MultiscalePointwise = functools.partial(make_maybe_multiscale, PointwiseLayer)
diff --git a/docs/src/requirements.txt b/docs/src/requirements.txt
new file mode 100644
index 0000000..c814c19
--- /dev/null
+++ b/docs/src/requirements.txt
@@ -0,0 +1,4 @@
+torch==1.9.0
+gym3
+attrs
+opencv-python
diff --git a/docs/src/run_agent.py b/docs/src/run_agent.py
new file mode 100644
index 0000000..fe9238e
--- /dev/null
+++ b/docs/src/run_agent.py
@@ -0,0 +1,35 @@
+from argparse import ArgumentParser
+import pickle
+
+from minerl.herobraine.env_specs.human_survival_specs import HumanSurvival
+
+from agent import MineRLAgent, ENV_KWARGS
+
+def main(model, weights):
+ env = HumanSurvival(**ENV_KWARGS).make()
+ print("---Loading model---")
+ agent_parameters = pickle.load(open(model, "rb"))
+ policy_kwargs = agent_parameters["model"]["args"]["net"]["args"]
+ pi_head_kwargs = agent_parameters["model"]["args"]["pi_head_opts"]
+ pi_head_kwargs["temperature"] = float(pi_head_kwargs["temperature"])
+ agent = MineRLAgent(env, policy_kwargs=policy_kwargs, pi_head_kwargs=pi_head_kwargs)
+ agent.load_weights(weights)
+
+ print("---Launching MineRL enviroment (be patient)---")
+ obs = env.reset()
+
+ while True:
+ minerl_action = agent.get_action(obs)
+ obs, reward, done, info = env.step(minerl_action)
+ env.render()
+
+
+if __name__ == "__main__":
+ parser = ArgumentParser("Run pretrained models on MineRL environment")
+
+ parser.add_argument("--weights", type=str, required=True, help="Path to the '.weights' file to be loaded.")
+ parser.add_argument("--model", type=str, required=True, help="Path to the '.model' file to be loaded.")
+
+ args = parser.parse_args()
+
+ main(args.model, args.weights)
diff --git a/docs/src/run_inverse_dynamics_model.py b/docs/src/run_inverse_dynamics_model.py
new file mode 100644
index 0000000..a932d92
--- /dev/null
+++ b/docs/src/run_inverse_dynamics_model.py
@@ -0,0 +1,205 @@
+# NOTE: this is _not_ the original code of IDM!
+# As such, while it is close and seems to function well,
+# its performance might be bit off from what is reported
+# in the paper.
+
+from argparse import ArgumentParser
+import pickle
+import cv2
+import numpy as np
+import json
+import torch as th
+
+from agent import ENV_KWARGS
+from inverse_dynamics_model import IDMAgent
+
+
+KEYBOARD_BUTTON_MAPPING = {
+ "key.keyboard.escape" :"ESC",
+ "key.keyboard.s" :"back",
+ "key.keyboard.q" :"drop",
+ "key.keyboard.w" :"forward",
+ "key.keyboard.1" :"hotbar.1",
+ "key.keyboard.2" :"hotbar.2",
+ "key.keyboard.3" :"hotbar.3",
+ "key.keyboard.4" :"hotbar.4",
+ "key.keyboard.5" :"hotbar.5",
+ "key.keyboard.6" :"hotbar.6",
+ "key.keyboard.7" :"hotbar.7",
+ "key.keyboard.8" :"hotbar.8",
+ "key.keyboard.9" :"hotbar.9",
+ "key.keyboard.e" :"inventory",
+ "key.keyboard.space" :"jump",
+ "key.keyboard.a" :"left",
+ "key.keyboard.d" :"right",
+ "key.keyboard.left.shift" :"sneak",
+ "key.keyboard.left.control" :"sprint",
+ "key.keyboard.f" :"swapHands",
+}
+
+# Template action
+NOOP_ACTION = {
+ "ESC": 0,
+ "back": 0,
+ "drop": 0,
+ "forward": 0,
+ "hotbar.1": 0,
+ "hotbar.2": 0,
+ "hotbar.3": 0,
+ "hotbar.4": 0,
+ "hotbar.5": 0,
+ "hotbar.6": 0,
+ "hotbar.7": 0,
+ "hotbar.8": 0,
+ "hotbar.9": 0,
+ "inventory": 0,
+ "jump": 0,
+ "left": 0,
+ "right": 0,
+ "sneak": 0,
+ "sprint": 0,
+ "swapHands": 0,
+ "camera": np.array([0, 0]),
+ "attack": 0,
+ "use": 0,
+ "pickItem": 0,
+}
+
+MESSAGE = """
+This script will take a video, predict actions for its frames and
+and show them with a cv2 window.
+
+Press any button the window to proceed to the next frame.
+"""
+
+# Matches a number in the MineRL Java code regarding sensitivity
+# This is for mapping from recorded sensitivity to the one used in the model
+CAMERA_SCALER = 360.0 / 2400.0
+
+
+def json_action_to_env_action(json_action):
+ """
+ Converts a json action into a MineRL action.
+ Returns (minerl_action, is_null_action)
+ """
+ # This might be slow...
+ env_action = NOOP_ACTION.copy()
+ # As a safeguard, make camera action again so we do not override anything
+ env_action["camera"] = np.array([0, 0])
+
+ is_null_action = True
+ keyboard_keys = json_action["keyboard"]["keys"]
+ for key in keyboard_keys:
+ # You can have keys that we do not use, so just skip them
+ # NOTE in original training code, ESC was removed and replaced with
+ # "inventory" action if GUI was open.
+ # Not doing it here, as BASALT uses ESC to quit the game.
+ if key in KEYBOARD_BUTTON_MAPPING:
+ env_action[KEYBOARD_BUTTON_MAPPING[key]] = 1
+ is_null_action = False
+
+ mouse = json_action["mouse"]
+ camera_action = env_action["camera"]
+ camera_action[0] = mouse["dy"] * CAMERA_SCALER
+ camera_action[1] = mouse["dx"] * CAMERA_SCALER
+
+ if mouse["dx"] != 0 or mouse["dy"] != 0:
+ is_null_action = False
+ else:
+ if abs(camera_action[0]) > 180:
+ camera_action[0] = 0
+ if abs(camera_action[1]) > 180:
+ camera_action[1] = 0
+
+ mouse_buttons = mouse["buttons"]
+ if 0 in mouse_buttons:
+ env_action["attack"] = 1
+ is_null_action = False
+ if 1 in mouse_buttons:
+ env_action["use"] = 1
+ is_null_action = False
+ if 2 in mouse_buttons:
+ env_action["pickItem"] = 1
+ is_null_action = False
+
+ return env_action, is_null_action
+
+
+def main(model, weights, video_path, json_path, n_batches, n_frames):
+ print(MESSAGE)
+ agent_parameters = pickle.load(open(model, "rb"))
+ net_kwargs = agent_parameters["model"]["args"]["net"]["args"]
+ pi_head_kwargs = agent_parameters["model"]["args"]["pi_head_opts"]
+ pi_head_kwargs["temperature"] = float(pi_head_kwargs["temperature"])
+ agent = IDMAgent(idm_net_kwargs=net_kwargs, pi_head_kwargs=pi_head_kwargs)
+ agent.load_weights(weights)
+
+ required_resolution = ENV_KWARGS["resolution"]
+ cap = cv2.VideoCapture(video_path)
+
+ json_index = 0
+ with open(json_path) as json_file:
+ json_lines = json_file.readlines()
+ json_data = "[" + ",".join(json_lines) + "]"
+ json_data = json.loads(json_data)
+
+ for _ in range(n_batches):
+ th.cuda.empty_cache()
+ print("=== Loading up frames ===")
+ frames = []
+ recorded_actions = []
+ for _ in range(n_frames):
+ ret, frame = cap.read()
+ if not ret:
+ break
+ assert frame.shape[0] == required_resolution[1] and frame.shape[1] == required_resolution[0], "Video must be of resolution {}".format(required_resolution)
+ # BGR -> RGB
+ frames.append(frame[..., ::-1])
+ env_action, _ = json_action_to_env_action(json_data[json_index])
+ recorded_actions.append(env_action)
+ json_index += 1
+ frames = np.stack(frames)
+ print("=== Predicting actions ===")
+ predicted_actions = agent.predict_actions(frames)
+
+ for i in range(n_frames):
+ frame = frames[i]
+ recorded_action = recorded_actions[i]
+ cv2.putText(
+ frame,
+ f"name: prediction (true)",
+ (10, 10),
+ cv2.FONT_HERSHEY_SIMPLEX,
+ 0.4,
+ (255, 255, 255),
+ 1
+ )
+ for y, (action_name, action_array) in enumerate(predicted_actions.items()):
+ current_prediction = action_array[0, i]
+ cv2.putText(
+ frame,
+ f"{action_name}: {current_prediction} ({recorded_action[action_name]})",
+ (10, 25 + y * 12),
+ cv2.FONT_HERSHEY_SIMPLEX,
+ 0.35,
+ (255, 255, 255),
+ 1
+ )
+ # RGB -> BGR again...
+ cv2.imshow("MineRL IDM model predictions", frame[..., ::-1])
+ cv2.waitKey(0)
+ cv2.destroyAllWindows()
+
+if __name__ == "__main__":
+ parser = ArgumentParser("Run IDM on MineRL recordings.")
+
+ parser.add_argument("--weights", type=str, required=True, help="Path to the '.weights' file to be loaded.")
+ parser.add_argument("--model", type=str, required=True, help="Path to the '.model' file to be loaded.")
+ parser.add_argument("--video-path", type=str, required=True, help="Path to a .mp4 file (Minecraft recording).")
+ parser.add_argument("--jsonl-path", type=str, required=True, help="Path to a .jsonl file (Minecraft recording).")
+ parser.add_argument("--n-frames", type=int, default=128, help="Number of frames to process at a time.")
+ parser.add_argument("--n-batches", type=int, default=10, help="Number of batches (n-frames) to process for visualization.")
+
+ args = parser.parse_args()
+
+ main(args.model, args.weights, args.video_path, args.jsonl_path, args.n_batches, args.n_frames)
diff --git a/docs/tree.html b/docs/tree.html
new file mode 100644
index 0000000..ca18e30
--- /dev/null
+++ b/docs/tree.html
@@ -0,0 +1,150 @@
+
+
+
+
+
+
+
+
+ Project structure of: openai/Video-Pre-Training
+
+
+
+
+
+
+
+ Project structure of: openai/Video-Pre-Training
+
+Video-Pre-Training Minecraft AI training, competition, resources, and models.
+
+
+
+
+
+
+
\ No newline at end of file