From 4c35e871c4665543792f4dfc97c30762457baffc Mon Sep 17 00:00:00 2001
From: Ann Zhang <az275@cornell.edu>
Date: Tue, 3 Jun 2025 12:30:55 -0400
Subject: [PATCH 01/41] wip batching implementation

---
 core/job.py           |   5 +-
 core/simulation.py    |   1 -
 core/task.py          |   5 +-
 core/workflow.py      | 131 ++++++++++++++++++++++++++++--------------
 workers/taskworker.py |   7 +++
 5 files changed, 103 insertions(+), 46 deletions(-)

diff --git a/core/job.py b/core/job.py
index c5b6ec3..aa8f9b8 100644
--- a/core/job.py
+++ b/core/job.py
@@ -84,7 +84,10 @@ def job_generate_from_workflow(self):
                                 task_cfg["EXECUTION_TIME"], 
                                 required_model_for_task, 
                                 task_cfg["INPUT_SIZE"],
-                                task_cfg["OUTPUT_SIZE"])  
+                                task_cfg["OUTPUT_SIZE"],
+                                task_cfg["MAX_BATCH_SIZE"],
+                                task_cfg["MAX_WAIT_TIME"],
+                                task_cfg["SLOWDOWN_FACTOR"])
 
             self.tasks.append(current_task)
 
diff --git a/core/simulation.py b/core/simulation.py
index 8aca6bd..93afe3a 100644
--- a/core/simulation.py
+++ b/core/simulation.py
@@ -3,7 +3,6 @@
 is referenced from Sparrow: https://github.com/radlab/sparrow 
 '''
 
-import imp
 import numpy as np
 from matplotlib import pyplot as plt
 from core.config import *
diff --git a/core/task.py b/core/task.py
index f97fdce..ba8ccff 100644
--- a/core/task.py
+++ b/core/task.py
@@ -2,7 +2,7 @@
 
 
 class Task(object):
-    def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size):
+    def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size, max_batch_size, max_wait_time, slowdown_factor):
         self.job_id = job_id                           # id of the job the task belongs to
         self.task_id = task_id                         # id of the task itself
         # the time it takes to execute the task
@@ -12,6 +12,9 @@ def __init__(self, job_id, task_id, task_exec_duration, required_model, input_si
         # task input size to model. 
         self.input_size = input_size
         self.result_size = result_size                 # output size
+        self.max_batch_size = max_batch_size
+        self.max_wait_time = max_wait_time
+        self.slowdown_factor = slowdown_factor
         # list of Tasks (inputs) that this task requires ( list will be appended as the job generated)
         self.required_task_ids = []                        # list of task ids
         self.next_task_ids = []                            # list of task ids
diff --git a/core/workflow.py b/core/workflow.py
index 4d5fd9f..e3c6b7c 100644
--- a/core/workflow.py
+++ b/core/workflow.py
@@ -9,52 +9,67 @@
      "TASKS": [{"MODEL_NAME": "OPT",
                 "MODEL_ID": 0,
                 "TASK_INDEX": 0,
-                 "PREV_TASK_INDEX": [],
-                 "NEXT_TASK_INDEX": [1,2,3],
-                 "MODEL_SIZE": 5720000,       # in kB
-                 "INPUT_SIZE": 1,          
-                 "OUTPUT_SIZE": 2,         # in kB
-                 "EXECUTION_TIME": 561   # avg time, in ms
+                "PREV_TASK_INDEX": [],
+                "NEXT_TASK_INDEX": [1,2,3],
+                "MODEL_SIZE": 5720000,       # in kB
+                "INPUT_SIZE": 1,
+                "OUTPUT_SIZE": 2,            # in kB
+                "EXECUTION_TIME": 561,       # avg time, in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                {"MODEL_NAME": "marian",
                 "MODEL_ID": 1,
                 "TASK_INDEX": 1,
                 "PREV_TASK_INDEX": [0],
                 "NEXT_TASK_INDEX": [4],
-                "MODEL_SIZE": 800000,         # in kB
+                "MODEL_SIZE": 800000,        # in kB
                 "INPUT_SIZE": 2,           
                 "OUTPUT_SIZE": 2,
-                "EXECUTION_TIME": 441     # in ms
+                "EXECUTION_TIME": 441,       # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                {"MODEL_NAME": "mt5",
                 "MODEL_ID": 2,
                 "TASK_INDEX": 2,
                 "PREV_TASK_INDEX": [0],
                 "NEXT_TASK_INDEX": [4],
-                "MODEL_SIZE": 2000000,        # in KB
+                "MODEL_SIZE": 2000000,       # in KB
                 "INPUT_SIZE": 2,           
                 "OUTPUT_SIZE": 2,
-                "EXECUTION_TIME": 778      # in ms 
+                "EXECUTION_TIME": 778,       # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                 {"MODEL_NAME": "mt5",
                 "MODEL_ID": 2,
                 "TASK_INDEX": 3,
                 "PREV_TASK_INDEX": [0],
                 "NEXT_TASK_INDEX": [4],
-                "MODEL_SIZE": 2000000,        # in KB
+                "MODEL_SIZE": 2000000,       # in KB
                 "INPUT_SIZE": 2,           
                 "OUTPUT_SIZE": 2,
-                "EXECUTION_TIME": 803      # in ms 
+                "EXECUTION_TIME": 803,       # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                 {"MODEL_NAME": "",
                 "MODEL_ID": -1,
                 "TASK_INDEX": 4,
                 "PREV_TASK_INDEX": [1,2,3],
                 "NEXT_TASK_INDEX": [],
-                "MODEL_SIZE": 0,        # in KB
+                "MODEL_SIZE": 0,             # in KB
                 "INPUT_SIZE": 2,           
                 "OUTPUT_SIZE": 2,
-                "EXECUTION_TIME": 1      # in ms 
+                "EXECUTION_TIME": 1,         # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                ]
      },
@@ -66,12 +81,15 @@
      "TASKS": [{"MODEL_NAME": "OPT",
                 "MODEL_ID": 0,
                 "TASK_INDEX": 0,
-                 "PREV_TASK_INDEX": [],
-                 "NEXT_TASK_INDEX": [1],
-                 "MODEL_SIZE": 5720000,       # in kB
-                 "INPUT_SIZE": 1,          
-                 "OUTPUT_SIZE": 2,         # in kB
-                 "EXECUTION_TIME": 560   # avg time, in ms
+                "PREV_TASK_INDEX": [],
+                "NEXT_TASK_INDEX": [1],
+                "MODEL_SIZE": 5720000,       # in kB
+                "INPUT_SIZE": 1,
+                "OUTPUT_SIZE": 2,            # in kB
+                "EXECUTION_TIME": 560,       # avg time, in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                {"MODEL_NAME": "NLI",
                 "MODEL_ID": 3,
@@ -79,9 +97,12 @@
                 "PREV_TASK_INDEX": [0],
                 "NEXT_TASK_INDEX": [],
                 "MODEL_SIZE": 2140000,       # in kB
-                "INPUT_SIZE": 1,         
+                "INPUT_SIZE": 1,
                 "OUTPUT_SIZE": 1,
-                "EXECUTION_TIME": 27     # in ms  
+                "EXECUTION_TIME": 27,        # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 }
                ]
      },
@@ -94,40 +115,52 @@
                 "TASK_INDEX": 0,
                 "PREV_TASK_INDEX": [],
                 "NEXT_TASK_INDEX": [1,2],
-                "MODEL_SIZE": 1700000,  # in kB
-                "INPUT_SIZE": 3000,  # 224 x 224 x 3 shape, assuming 64 bits representation
+                "MODEL_SIZE": 1700000,       # in kB
+                "INPUT_SIZE": 3000,          # 224 x 224 x 3 shape, assuming 64 bits representation
                 "OUTPUT_SIZE": 20,
-                "EXECUTION_TIME": 283  # avg time, in ms
+                "EXECUTION_TIME": 283,       # avg time, in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                {"MODEL_NAME": "NLI",
                 "MODEL_ID": 3,
                 "TASK_INDEX": 1,
                 "PREV_TASK_INDEX": [0],
                 "NEXT_TASK_INDEX": [3],
-                "MODEL_SIZE": 2140000,  # in kB
-                "INPUT_SIZE": 20,   # 299×299, assuming 64 bits representation
+                "MODEL_SIZE": 2140000,       # in kB
+                "INPUT_SIZE": 20,            # 299×299, assuming 64 bits representation
                 "OUTPUT_SIZE": 10, 
-                "EXECUTION_TIME": 26  # in ms
+                "EXECUTION_TIME": 26,        # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                {"MODEL_NAME": "txt2speech",
                 "MODEL_ID": 5,
                 "TASK_INDEX": 2,
                 "PREV_TASK_INDEX": [0],
                 "NEXT_TASK_INDEX": [3],
-                "MODEL_SIZE": 2700000,  # in kB
+                "MODEL_SIZE": 2700000,       # in kB
                 "INPUT_SIZE": 20,
                 "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 76  # in ms
+                "EXECUTION_TIME": 76,        # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                 {"MODEL_NAME": "aggregate",
                 "MODEL_ID": -1,
                 "TASK_INDEX": 3,
                 "PREV_TASK_INDEX": [1,2],
                 "NEXT_TASK_INDEX": [],
-                "MODEL_SIZE": -1,  # in kB
+                "MODEL_SIZE": -1,            # in kB
                 "INPUT_SIZE": 3000,
                 "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 0.2     # in ms
+                "EXECUTION_TIME": 0.2,       # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 }
                ]
      },
@@ -140,40 +173,52 @@
                 "TASK_INDEX": 0,
                 "PREV_TASK_INDEX": [],
                 "NEXT_TASK_INDEX": [1,2],
-                "MODEL_SIZE": -1,  # in kB
-                "INPUT_SIZE": 3000, 
+                "MODEL_SIZE": -1,            # in kB
+                "INPUT_SIZE": 3000,
                 "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 0.6  # avg time, in ms
+                "EXECUTION_TIME": 0.6,       # avg time, in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                {"MODEL_NAME": "DETR",
                 "MODEL_ID": 8,
                 "TASK_INDEX": 1,
                 "PREV_TASK_INDEX": [0],
                 "NEXT_TASK_INDEX": [3],
-                "MODEL_SIZE": 1800000,  # in kB
-                "INPUT_SIZE": 3000,  # 299×299, assuming 64 bits representation
-                "OUTPUT_SIZE": 3000,  #
-                "EXECUTION_TIME": 178  # in ms
+                "MODEL_SIZE": 1800000,       # in kB
+                "INPUT_SIZE": 3000,          # 299×299, assuming 64 bits representation
+                "OUTPUT_SIZE": 3000,
+                "EXECUTION_TIME": 178,       # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                {"MODEL_NAME": "Depth",
                 "MODEL_ID": 9,
                 "TASK_INDEX": 2,
                 "PREV_TASK_INDEX": [0],
                 "NEXT_TASK_INDEX": [3],
-                "MODEL_SIZE": 3900000,  # in kB
+                "MODEL_SIZE": 3900000,       # in kB
                 "INPUT_SIZE": 3000,
                 "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 147  # in ms
+                "EXECUTION_TIME": 147,       # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 },
                 {"MODEL_NAME": "Aggregate",
                 "MODEL_ID": -1,
                 "TASK_INDEX": 3,
                 "PREV_TASK_INDEX": [1,2],
                 "NEXT_TASK_INDEX": [],
-                "MODEL_SIZE": -1,  # in kB
+                "MODEL_SIZE": -1,            # in kB
                 "INPUT_SIZE": 3000,
                 "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 104  # in ms
+                "EXECUTION_TIME": 104,       # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 5000,       # ms
+                "SLOWDOWN_FACTOR": 1.2       # batch execution time
                 }
                ]
      },
diff --git a/workers/taskworker.py b/workers/taskworker.py
index 73df922..e81a0eb 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -74,12 +74,19 @@ def maybe_start_task(self, current_time):
             if (current_time >= task.log.task_placed_on_worker_queue_timestamp):
                 # if self.worker_id == 2:
                 #     print("time{}, exec_task {}. job_start_time: {}, job_type: {} ".format(current_time, task, self.simulation.jobs[task.job_id].create_time, self.simulation.jobs[task.job_id].job_type_id))
+
+                # execute batch subject to the following constraints:
+                # batch cannot exceed max_batch_size
+                # a task cannot wait longer than max_wait_time
                 task_end_events, task_end_time = self.task_execute(
                     task, current_time)
                 self.rm_task_in_queue_history(task, current_time)
                 break
         return task_end_events
 
+    # modify to handle a batch of tasks:
+    # need to model batch execution duration
+    # transfer to next step should handle a list of tasks
     def task_execute(self, task, current_time):
         self.involved = True
         self.num_free_slots -= 1

From ff389c61da6e538c4cd0467ad7dc30ad11238a3a Mon Sep 17 00:00:00 2001
From: Ann Zhang <az275@cornell.edu>
Date: Tue, 3 Jun 2025 15:55:35 -0400
Subject: [PATCH 02/41] batching config

---
 core/job.py           |  3 ++-
 core/task.py          |  5 +++--
 core/workflow.py      | 45 ++++++++++++++++++++++++++++---------------
 workers/taskworker.py |  6 +++---
 4 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/core/job.py b/core/job.py
index aa8f9b8..6d9822f 100644
--- a/core/job.py
+++ b/core/job.py
@@ -87,7 +87,8 @@ def job_generate_from_workflow(self):
                                 task_cfg["OUTPUT_SIZE"],
                                 task_cfg["MAX_BATCH_SIZE"],
                                 task_cfg["MAX_WAIT_TIME"],
-                                task_cfg["SLOWDOWN_FACTOR"])
+                                task_cfg["BATCH_SIZES"],
+                                task_cfg["BATCH_EXEC_TIME"])
 
             self.tasks.append(current_task)
 
diff --git a/core/task.py b/core/task.py
index ba8ccff..b490e52 100644
--- a/core/task.py
+++ b/core/task.py
@@ -2,7 +2,7 @@
 
 
 class Task(object):
-    def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size, max_batch_size, max_wait_time, slowdown_factor):
+    def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size, max_batch_size, max_wait_time, batch_sizes, batch_exec_time):
         self.job_id = job_id                           # id of the job the task belongs to
         self.task_id = task_id                         # id of the task itself
         # the time it takes to execute the task
@@ -14,7 +14,8 @@ def __init__(self, job_id, task_id, task_exec_duration, required_model, input_si
         self.result_size = result_size                 # output size
         self.max_batch_size = max_batch_size
         self.max_wait_time = max_wait_time
-        self.slowdown_factor = slowdown_factor
+        self.batch_sizes = batch_sizes
+        self.batch_exec_time = batch_exec_time
         # list of Tasks (inputs) that this task requires ( list will be appended as the job generated)
         self.required_task_ids = []                        # list of task ids
         self.next_task_ids = []                            # list of task ids
diff --git a/core/workflow.py b/core/workflow.py
index e3c6b7c..fc8f7a4 100644
--- a/core/workflow.py
+++ b/core/workflow.py
@@ -17,7 +17,8 @@
                 "EXECUTION_TIME": 561,       # avg time, in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [561, 561, 561, 561, 561]
                 },
                {"MODEL_NAME": "marian",
                 "MODEL_ID": 1,
@@ -30,7 +31,8 @@
                 "EXECUTION_TIME": 441,       # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [441, 441, 441, 441, 441]
                 },
                {"MODEL_NAME": "mt5",
                 "MODEL_ID": 2,
@@ -43,7 +45,8 @@
                 "EXECUTION_TIME": 778,       # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [778, 778, 778, 778, 778]
                 },
                 {"MODEL_NAME": "mt5",
                 "MODEL_ID": 2,
@@ -56,7 +59,8 @@
                 "EXECUTION_TIME": 803,       # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [803, 803, 803, 803, 803]
                 },
                 {"MODEL_NAME": "",
                 "MODEL_ID": -1,
@@ -69,7 +73,8 @@
                 "EXECUTION_TIME": 1,         # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [1, 1, 1, 1, 1]
                 },
                ]
      },
@@ -89,7 +94,8 @@
                 "EXECUTION_TIME": 560,       # avg time, in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [560, 560, 560, 560, 560]
                 },
                {"MODEL_NAME": "NLI",
                 "MODEL_ID": 3,
@@ -102,7 +108,8 @@
                 "EXECUTION_TIME": 27,        # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [27, 27, 27, 27, 27]
                 }
                ]
      },
@@ -121,7 +128,8 @@
                 "EXECUTION_TIME": 283,       # avg time, in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [283, 283, 283, 283, 283]
                 },
                {"MODEL_NAME": "NLI",
                 "MODEL_ID": 3,
@@ -134,7 +142,8 @@
                 "EXECUTION_TIME": 26,        # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [26, 26, 26, 26, 26]
                 },
                {"MODEL_NAME": "txt2speech",
                 "MODEL_ID": 5,
@@ -147,7 +156,8 @@
                 "EXECUTION_TIME": 76,        # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [76, 76, 76, 76, 76]
                 },
                 {"MODEL_NAME": "aggregate",
                 "MODEL_ID": -1,
@@ -160,7 +170,8 @@
                 "EXECUTION_TIME": 0.2,       # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [0.2, 0.2, 0.2, 0.2, 0.2]
                 }
                ]
      },
@@ -179,7 +190,8 @@
                 "EXECUTION_TIME": 0.6,       # avg time, in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [0.6, 0.6, 0.6, 0.6, 0.6]
                 },
                {"MODEL_NAME": "DETR",
                 "MODEL_ID": 8,
@@ -192,7 +204,8 @@
                 "EXECUTION_TIME": 178,       # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [178, 178, 178, 178, 178]
                 },
                {"MODEL_NAME": "Depth",
                 "MODEL_ID": 9,
@@ -205,7 +218,8 @@
                 "EXECUTION_TIME": 147,       # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [147, 147, 147, 147, 147]
                 },
                 {"MODEL_NAME": "Aggregate",
                 "MODEL_ID": -1,
@@ -218,7 +232,8 @@
                 "EXECUTION_TIME": 104,       # in ms
                 "MAX_BATCH_SIZE": 16,
                 "MAX_WAIT_TIME": 5000,       # ms
-                "SLOWDOWN_FACTOR": 1.2       # batch execution time
+                "BATCH_SIZES": [1, 2, 4, 8, 16],
+                "BATCH_EXEC_TIME": [104, 104, 104, 104, 104]
                 }
                ]
      },
diff --git a/workers/taskworker.py b/workers/taskworker.py
index e81a0eb..879d02b 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -63,6 +63,8 @@ def schedule_job_heft(self, current_time, job):
 
     #  ---------------------------  TASK EXECUTION  ----------------------
 
+    # new event for modeling max_wait_time
+    # wake up thread in intervals of no more than max_wait_time
     def maybe_start_task(self, current_time):
         task_end_events = []
         task_list = self.get_queue_history(current_time, info_staleness=0)
@@ -75,9 +77,7 @@ def maybe_start_task(self, current_time):
                 # if self.worker_id == 2:
                 #     print("time{}, exec_task {}. job_start_time: {}, job_type: {} ".format(current_time, task, self.simulation.jobs[task.job_id].create_time, self.simulation.jobs[task.job_id].job_type_id))
 
-                # execute batch subject to the following constraints:
-                # batch cannot exceed max_batch_size
-                # a task cannot wait longer than max_wait_time
+                # form and execute batch
                 task_end_events, task_end_time = self.task_execute(
                     task, current_time)
                 self.rm_task_in_queue_history(task, current_time)

From 18741b6e613b741fdc1cbbe07821871ff79c0b23 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Wed, 4 Jun 2025 10:02:16 -0400
Subject: [PATCH 03/41] worker wake up after max wait time

---
 workers/taskworker.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/workers/taskworker.py b/workers/taskworker.py
index 879d02b..489c426 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -4,6 +4,7 @@
 from core.network import *
 from core.events import *
 from schedulers.algo.nav_heft_algo import *
+import time
 
 
 class TaskWorker(Worker):
@@ -14,6 +15,7 @@ def __init__(self, simulation, num_free_slots, worker_id):
         # keep track of the queue information at time:  [ (time1,[task0,task1,]), (time2,[task1,...]),...]
         self.queue_history = []
         self.involved = False
+        self.last_batch_end_time = None
 
     def add_task(self, current_time, task):
         """
@@ -66,6 +68,8 @@ def schedule_job_heft(self, current_time, job):
     # new event for modeling max_wait_time
     # wake up thread in intervals of no more than max_wait_time
     def maybe_start_task(self, current_time):
+        latest_time = current_time
+
         task_end_events = []
         task_list = self.get_queue_history(current_time, info_staleness=0)
         # print(task_list)
@@ -80,8 +84,20 @@ def maybe_start_task(self, current_time):
                 # form and execute batch
                 task_end_events, task_end_time = self.task_execute(
                     task, current_time)
+                latest_time = max(latest_time, task_end_time) # update worker time for wake up
                 self.rm_task_in_queue_history(task, current_time)
                 break
+
+        self.last_batch_end_time = latest_time
+
+        # print(current_time)
+        self.simulation.event_queue.put(
+            EventOrders(
+                latest_time + WorkerWakeUpEvent.MAX_WAIT_TIME,
+                WorkerWakeUpEvent(self)
+            )
+        )
+
         return task_end_events
 
     # modify to handle a batch of tasks:
@@ -103,6 +119,9 @@ def task_execute(self, task, current_time):
         task.log.task_front_queue_timestamp = current_time
         task.log.task_execution_start_timestamp = current_time + model_fetch_time
         task.log.task_execution_end_timestamp = task_end_time
+
+        # print(f"curr: {current_time}, end: {task_end_time}")
+
         return task_end_events, task_end_time
 
     #  ---------------------------  Subsequent TASK Transfer   --------------------

From 04fefb905e72732207c9b0a038ead1b6478a973e Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Wed, 4 Jun 2025 10:02:42 -0400
Subject: [PATCH 04/41] wake up event

---
 core/events.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/core/events.py b/core/events.py
index abef882..5320118 100644
--- a/core/events.py
+++ b/core/events.py
@@ -200,6 +200,30 @@ def to_string(self):
         return "[Job End] ==="
 
 
+class WorkerWakeUpEvent(Event):
+    """
+    Event to signify that max_wait_time has passed and worker should
+    check task queue.
+    """
+
+    # TODO: Get max wait time
+    MAX_WAIT_TIME = 50 # ms
+
+    def __init__(self, worker):
+        self.worker = worker
+
+    def run(self, current_time):
+        if self.will_run(current_time):
+            return self.worker.maybe_start_task(current_time)
+        return []
+
+    def to_string(self):
+        return f"[Worker (id: {self.worker.worker_id}) Wake Up]"
+    
+    def will_run(self, current_time):
+        return (self.worker.last_batch_end_time + self.MAX_WAIT_TIME) == current_time
+
+
 class EventOrders:
     """
     Used so that the Simulation keeps track of the priority queue order

From aa7eb1cd99effd7168fec9fec78122a8b3e38d5c Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Wed, 4 Jun 2025 10:03:27 -0400
Subject: [PATCH 05/41] event logging

---
 core/simulation.py                               |  2 ++
 experiments/run_experiments.py                   | 10 ++++++++++
 schedulers/centralized/simulation_central.py     |  4 ++++
 schedulers/decentralized/simulation_decentral.py |  6 ++++++
 4 files changed, 22 insertions(+)

diff --git a/core/simulation.py b/core/simulation.py
index 93afe3a..22b3f1b 100644
--- a/core/simulation.py
+++ b/core/simulation.py
@@ -43,6 +43,8 @@ def __init__(
         # Tracking measurements
         self.result_to_export = pd.DataFrame()
         self.tasks_logging_times = pd.DataFrame()
+        self.event_log = pd.DataFrame(columns=["time", "event"])
+
         print("---- SIMULATION : " + self.simulation_name + "----")
         self.produce_breakdown =  produce_breakdown
 
diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py
index 7630b43..6fe6034 100644
--- a/experiments/run_experiments.py
+++ b/experiments/run_experiments.py
@@ -51,6 +51,9 @@
                                     num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list)
         sim.run()
 
+        event_log = sim.event_log
+        event_log.to_csv(OUTPUT_FILE_NAMES["centralheft"] + "events_by_time.csv")
+
         # result_to_export = sim.result_to_export
         tasks_logging_times = sim.tasks_logging_times
         tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["centralheft"] + "loadDelay_" + str(
@@ -61,6 +64,9 @@
         sim = Simulation_central(simulation_name="hashtask", job_split="PER_TASK",
                                     num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list)
         sim.run()
+
+        event_log = sim.event_log
+        event_log.to_csv(OUTPUT_FILE_NAMES["hashtask"] + "events_by_time.csv")
         
         tasks_logging_times = sim.tasks_logging_times
         tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["hashtask"] + "loadDelay_" + str(
@@ -80,6 +86,10 @@
         # dataframe = sim.result_to_export
         # dataframe.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "loadDelay_" + str(
         #     LOAD_INFORMATION_STALENESS) + "_placementDelay_" + str(PLACEMENT_INFORMATION_STALENESS) + ".csv")
+        
+        event_log = sim.event_log
+        event_log.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "events_by_time.csv")
+        
         tasks_logging_times = sim.tasks_logging_times
         tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "loadDelay_" + str(
             LOAD_INFORMATION_STALENESS) + "_placementDelay_" + str(PLACEMENT_INFORMATION_STALENESS) + ".csv")
diff --git a/schedulers/centralized/simulation_central.py b/schedulers/centralized/simulation_central.py
index 13def63..3754077 100644
--- a/schedulers/centralized/simulation_central.py
+++ b/schedulers/centralized/simulation_central.py
@@ -52,6 +52,10 @@ def run(self):
         last_time = 0
         while self.remaining_jobs > 0:
             cur_event = self.event_queue.get()
+
+            if type(cur_event) != WorkerWakeUpEvent or cur_event.will_run(cur_event.current_time):
+                self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()]
+
             assert cur_event.current_time >= last_time
             last_time = cur_event.current_time
             new_events = cur_event.event.run(cur_event.current_time)
diff --git a/schedulers/decentralized/simulation_decentral.py b/schedulers/decentralized/simulation_decentral.py
index b716d7e..0ba0f22 100644
--- a/schedulers/decentralized/simulation_decentral.py
+++ b/schedulers/decentralized/simulation_decentral.py
@@ -1,3 +1,5 @@
+import pandas as pd
+
 from queue import PriorityQueue
 
 from core.simulation import *
@@ -48,6 +50,10 @@ def run(self):
         last_time = 0
         while self.remaining_jobs > 0:
             cur_event = self.event_queue.get()
+
+            if type(cur_event) != WorkerWakeUpEvent or cur_event.will_run(cur_event.current_time):
+                self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()]
+
             assert cur_event.current_time >= last_time
             last_time = cur_event.current_time
             new_events = cur_event.event.run(cur_event.current_time)

From d4ae0f1faa5d0650bd0963400e78316d581559e2 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Wed, 4 Jun 2025 17:08:38 -0400
Subject: [PATCH 06/41] batching

---
 core/events.py        |  43 +++++++++++---
 workers/taskworker.py | 131 ++++++++++++++++++++++++++++++------------
 2 files changed, 127 insertions(+), 47 deletions(-)

diff --git a/core/events.py b/core/events.py
index 5320118..d7515bd 100644
--- a/core/events.py
+++ b/core/events.py
@@ -166,6 +166,22 @@ def to_string(self):
         return "[Task End (Job {} - Task {}) at Worker {}] ===".format(self.job_id, self.task_id, self.worker.worker_id)
 
 
+class BatchEndEvent(Event):
+    """ Event to signify that a BATCH has been performed by the WORKER. """
+
+    def __init__(self, worker, job_ids=[], task_id=-1):
+        self.worker = worker
+        self.job_ids = job_ids    # integers representing the job_ids
+        self.task_id = task_id  # integer representing the task_id
+
+    def run(self, current_time):
+        return self.worker.free_slot(current_time)
+
+    def to_string(self):
+        jobs = ",".join([str(id) for id in self.job_ids])
+        return f"[Batch End (Task {self.task_id}, Jobs {jobs}) at Worker {self.worker.worker_id}]"
+
+
 # for PER_JOB scheduler
 class JobAssignEvent(Event):
     """
@@ -206,22 +222,31 @@ class WorkerWakeUpEvent(Event):
     check task queue.
     """
 
-    # TODO: Get max wait time
-    MAX_WAIT_TIME = 50 # ms
-
-    def __init__(self, worker):
+    def __init__(self, worker, task_id, task_max_wait_time):
         self.worker = worker
+        self.task_id = task_id
+        self.task_max_wait_time = task_max_wait_time
 
     def run(self, current_time):
-        if self.will_run(current_time):
-            return self.worker.maybe_start_task(current_time)
-        return []
+        # print(f"RUN: {self.will_run(current_time)}")
+        # if not self.will_run(current_time):
+        #     print(self.worker.last_queue_check_times)
+        #     print(f"EXPECT: {self.worker.last_queue_check_times[self.task_id] + self.task_max_wait_time}; ONLY AT {current_time}")
+
+        # if self.will_run(current_time):
+        _, task_end_events = self.worker.maybe_start_task_for_type(
+            current_time, self.task_id, self.task_max_wait_time, True
+        )
+        return task_end_events
+        # return []
 
     def to_string(self):
-        return f"[Worker (id: {self.worker.worker_id}) Wake Up]"
+        return f"[Worker (id: {self.worker.worker_id}) Wake Up (task id: {self.task_id})]"
     
     def will_run(self, current_time):
-        return (self.worker.last_batch_end_time + self.MAX_WAIT_TIME) == current_time
+        if self.task_id in self.worker.next_check_times:
+            return current_time >= self.worker.next_check_times[self.task_id]
+        return True # if no batch has been run yet, wake up should be executed
 
 
 class EventOrders:
diff --git a/workers/taskworker.py b/workers/taskworker.py
index 489c426..c49e1cc 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -15,21 +15,25 @@ def __init__(self, simulation, num_free_slots, worker_id):
         # keep track of the queue information at time:  [ (time1,[task0,task1,]), (time2,[task1,...]),...]
         self.queue_history = []
         self.involved = False
-        self.last_batch_end_time = None
+        self.next_check_times = {}
 
     def add_task(self, current_time, task):
         """
         Add task into the local task queue
         """
+
+        # print(f"[{current_time}] W{self.worker_id}: T{task.task_id} arrived")
+
         # Update when the task is sent to the worker
         assert (task.log.task_placed_on_worker_queue_timestamp <= current_time)
         self.add_task_to_queue_history(task, current_time)
-        return self.maybe_start_task(current_time)
+        _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_id, task.max_wait_time, False)
+        return task_end_events
 
     def free_slot(self, current_time):
         """ Frees a slot on the worker and attempts to launch another task in that slot. """
         self.num_free_slots += 1
-        get_task_events = self.maybe_start_task(current_time)
+        get_task_events = self.maybe_start_task_any(current_time)
         return get_task_events
 
     #  --------------------------- DECENTRALIZED WORKER SCHEDULING  ----------------------
@@ -65,62 +69,113 @@ def schedule_job_heft(self, current_time, job):
 
     #  ---------------------------  TASK EXECUTION  ----------------------
 
-    # new event for modeling max_wait_time
-    # wake up thread in intervals of no more than max_wait_time
-    def maybe_start_task(self, current_time):
+    def maybe_start_task_any(self, current_time):
+        task_list = self.get_queue_history(current_time, info_staleness=0)
+
+        queued_tasks = queue.Queue()
+        [queued_tasks.put(task) for task in task_list]
+        while (not queued_tasks.empty()) and self.num_free_slots > 0:
+            task = queued_tasks.get()
+            if (current_time >= task.log.task_placed_on_worker_queue_timestamp):
+                did_exec_batch, task_end_events = self.maybe_start_task_for_type(
+                    current_time, task.task_id, task.max_wait_time, False
+                )
+                if did_exec_batch:
+                    return task_end_events
+                # keep checking queue until batch is executed or tasks run out
+
+        # if no queued tasks, maybe is never called and no wake up events are
+        # appended; assume in this case worker will be woken up when any new 
+        # task arrives
+        return []
+    
+
+    def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_exec_batch) -> tuple[bool, list]:
+        """
+            Returns did_exec_batch : bool, task_end_events : list[Event]
+        """
         latest_time = current_time
+        did_exec_batch = False
 
         task_end_events = []
-        task_list = self.get_queue_history(current_time, info_staleness=0)
-        # print(task_list)
+        task_list = [task for task in self.get_queue_history(current_time, info_staleness=0) 
+                     if task.task_id == task_type]
+        
         queued_tasks = queue.Queue()
         [queued_tasks.put(task) for task in task_list]
-        while (not queued_tasks.empty()) and self.num_free_slots > 0:
+
+        batch = []
+        while (not queued_tasks.empty()) and self.num_free_slots > 0 and len(batch) < task_list[0].max_batch_size:
             task = queued_tasks.get()
             if (current_time >= task.log.task_placed_on_worker_queue_timestamp):
-                # if self.worker_id == 2:
-                #     print("time{}, exec_task {}. job_start_time: {}, job_type: {} ".format(current_time, task, self.simulation.jobs[task.job_id].create_time, self.simulation.jobs[task.job_id].job_type_id))
+                batch.append(task)
+        
+        # full batch or max wait time has passed
+        if len(task_list) > 0 and self.num_free_slots > 0 \
+            and (do_exec_batch or len(batch) >= task_list[0].max_batch_size):
 
-                # form and execute batch
-                task_end_events, task_end_time = self.task_execute(
-                    task, current_time)
-                latest_time = max(latest_time, task_end_time) # update worker time for wake up
+            batch_end_events, task_end_time = self.batch_execute(
+                batch, current_time)
+            
+            # rm all tasks in batch
+            for task in batch:
                 self.rm_task_in_queue_history(task, current_time)
-                break
 
-        self.last_batch_end_time = latest_time
+            latest_time = task_end_time
 
-        # print(current_time)
-        self.simulation.event_queue.put(
+            did_exec_batch = True
+            task_end_events += batch_end_events
+
+        next_check_time = latest_time + task_wait_time
+
+        # if idle, check again in wait time
+        task_end_events.append(
             EventOrders(
-                latest_time + WorkerWakeUpEvent.MAX_WAIT_TIME,
-                WorkerWakeUpEvent(self)
+                next_check_time,
+                WorkerWakeUpEvent(self, task_type, task_wait_time)
             )
         )
+        self.next_check_times[task_type] = next_check_time
 
-        return task_end_events
+        return did_exec_batch, task_end_events
 
     # modify to handle a batch of tasks:
     # need to model batch execution duration
     # transfer to next step should handle a list of tasks
-    def task_execute(self, task, current_time):
+    def batch_execute(self, tasks, current_time):
         self.involved = True
         self.num_free_slots -= 1
-        model_fetch_time = self.fetch_model(task.model, current_time)
-        task_end_time = current_time + model_fetch_time + task.task_exec_duration
-        events = self.send_result_to_next_workers(
-            task_end_time, task)
-        task_end_events = events
-        task_end_events.append(EventOrders(task_end_time, TaskEndEvent(
-            self, job_id=task.job_id, task_id=task.task_id)))
-        self.simulation.add_job_completion_time(
-            task.job_id, task.task_id, task_end_time)
-        # task log tracking
-        task.log.task_front_queue_timestamp = current_time
-        task.log.task_execution_start_timestamp = current_time + model_fetch_time
-        task.log.task_execution_end_timestamp = task_end_time
-
-        # print(f"curr: {current_time}, end: {task_end_time}")
+        model_fetch_time = self.fetch_model(tasks[0].model, current_time)
+
+        batch_index = 0
+        for i, batch_size in enumerate(sorted(tasks[0].batch_sizes)): # assumes batch_sizes are sorted
+            if len(tasks) <= batch_size:
+                batch_index = i
+                break
+
+        task_end_time = current_time + model_fetch_time + tasks[0].batch_exec_time[batch_index]
+        task_end_events = []
+
+        job_ids = []
+
+        for task in tasks:
+            events = self.send_result_to_next_workers(
+                task_end_time, task)
+            task_end_events += events
+
+            self.simulation.add_job_completion_time(
+                task.job_id, task.task_id, task_end_time)
+            
+            job_ids.append(task.job_id)
+        
+            # task log tracking
+            task.log.task_front_queue_timestamp = current_time
+            task.log.task_execution_start_timestamp = current_time + model_fetch_time
+            task.log.task_execution_end_timestamp = task_end_time
+
+        task_end_events.append(EventOrders(task_end_time, BatchEndEvent(
+            self, job_ids=job_ids, task_id=tasks[0].task_id
+        )))
 
         return task_end_events, task_end_time
 

From b9dcf0265616ad600cbae414371ad9731d3102e9 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Thu, 5 Jun 2025 10:49:40 -0400
Subject: [PATCH 07/41] comments

---
 core/events.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/core/events.py b/core/events.py
index d7515bd..db35834 100644
--- a/core/events.py
+++ b/core/events.py
@@ -228,22 +228,18 @@ def __init__(self, worker, task_id, task_max_wait_time):
         self.task_max_wait_time = task_max_wait_time
 
     def run(self, current_time):
-        # print(f"RUN: {self.will_run(current_time)}")
-        # if not self.will_run(current_time):
-        #     print(self.worker.last_queue_check_times)
-        #     print(f"EXPECT: {self.worker.last_queue_check_times[self.task_id] + self.task_max_wait_time}; ONLY AT {current_time}")
-
-        # if self.will_run(current_time):
-        _, task_end_events = self.worker.maybe_start_task_for_type(
-            current_time, self.task_id, self.task_max_wait_time, True
-        )
-        return task_end_events
-        # return []
+        if self.will_run(current_time):
+            _, task_end_events = self.worker.maybe_start_task_for_type(
+                current_time, self.task_id, self.task_max_wait_time, True
+            )
+            return task_end_events
+        return []
 
     def to_string(self):
         return f"[Worker (id: {self.worker.worker_id}) Wake Up (task id: {self.task_id})]"
     
     def will_run(self, current_time):
+        # skip current wake up if a later wake up has been scheduled
         if self.task_id in self.worker.next_check_times:
             return current_time >= self.worker.next_check_times[self.task_id]
         return True # if no batch has been run yet, wake up should be executed

From 7eba498553466121888b7ebae3777c590fa79bfd Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Thu, 5 Jun 2025 10:55:10 -0400
Subject: [PATCH 08/41] wake up enqueue bug fix and comments

---
 workers/taskworker.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/workers/taskworker.py b/workers/taskworker.py
index c49e1cc..6e514a1 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -85,13 +85,15 @@ def maybe_start_task_any(self, current_time):
                 # keep checking queue until batch is executed or tasks run out
 
         # if no queued tasks, maybe is never called and no wake up events are
-        # appended; assume in this case worker will be woken up when any new 
-        # task arrives
+        # appended; in this case worker sleeps until a new task arrives
         return []
     
 
     def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_exec_batch) -> tuple[bool, list]:
         """
+            Execute a batch if 1) a batch of size max_batch_size can be created or 2) do_exec_batch is True
+            (do_exec_batch should be True when maybe is called by a wake up event)
+
             Returns did_exec_batch : bool, task_end_events : list[Event]
         """
         latest_time = current_time
@@ -104,6 +106,7 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_
         queued_tasks = queue.Queue()
         [queued_tasks.put(task) for task in task_list]
 
+        # form largest batch < max_batch_size possible
         batch = []
         while (not queued_tasks.empty()) and self.num_free_slots > 0 and len(batch) < task_list[0].max_batch_size:
             task = queued_tasks.get()
@@ -126,16 +129,19 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_
             did_exec_batch = True
             task_end_events += batch_end_events
 
+        # track next wake up time so old wake ups can be skipped
         next_check_time = latest_time + task_wait_time
+        self.next_check_times[task_type] = next_check_time
 
         # if idle, check again in wait time
-        task_end_events.append(
+        # NOTE: for some reason, appending to task_end_events does not always
+        # lead to event being enqueued; thus we enqueue directly to sim queue here
+        self.simulation.event_queue.put(
             EventOrders(
                 next_check_time,
                 WorkerWakeUpEvent(self, task_type, task_wait_time)
             )
         )
-        self.next_check_times[task_type] = next_check_time
 
         return did_exec_batch, task_end_events
 
@@ -148,15 +154,15 @@ def batch_execute(self, tasks, current_time):
         model_fetch_time = self.fetch_model(tasks[0].model, current_time)
 
         batch_index = 0
-        for i, batch_size in enumerate(sorted(tasks[0].batch_sizes)): # assumes batch_sizes are sorted
-            if len(tasks) <= batch_size:
+        for i, batch_size in enumerate(sorted(tasks[0].batch_sizes)):
+            if len(tasks) <= batch_size: # choose smallest batch size > len(tasks)
                 batch_index = i
                 break
 
         task_end_time = current_time + model_fetch_time + tasks[0].batch_exec_time[batch_index]
         task_end_events = []
 
-        job_ids = []
+        job_ids = [] # for logging
 
         for task in tasks:
             events = self.send_result_to_next_workers(

From cc2b1df779cc7b98d50dde6d0a451112e489b77a Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Thu, 5 Jun 2025 10:55:30 -0400
Subject: [PATCH 09/41] produce breakdown

---
 experiments/run_experiments.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py
index 6fe6034..a14154e 100644
--- a/experiments/run_experiments.py
+++ b/experiments/run_experiments.py
@@ -48,7 +48,8 @@
     # 2. Run and collect data
     if "centralheft" in experiment_schedulers:
         sim = Simulation_central(simulation_name="centralheft", job_split="PER_TASK",
-                                    num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list)
+                                    num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list, 
+                                    produce_breakdown=True)
         sim.run()
 
         event_log = sim.event_log
@@ -62,7 +63,8 @@
     if "hashtask" in experiment_schedulers:
         OUTPUT_FILENAME = "hashtask"
         sim = Simulation_central(simulation_name="hashtask", job_split="PER_TASK",
-                                    num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list)
+                                    num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list,
+                                    produce_breakdown=True)
         sim.run()
 
         event_log = sim.event_log

From 2e656717978a5b14e1860406b8bdb38c376c5925 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Fri, 6 Jun 2025 08:16:52 -0400
Subject: [PATCH 10/41] start batch whenever possible

---
 core/events.py        |  2 +-
 workers/taskworker.py | 13 +++++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/core/events.py b/core/events.py
index db35834..269b9e5 100644
--- a/core/events.py
+++ b/core/events.py
@@ -230,7 +230,7 @@ def __init__(self, worker, task_id, task_max_wait_time):
     def run(self, current_time):
         if self.will_run(current_time):
             _, task_end_events = self.worker.maybe_start_task_for_type(
-                current_time, self.task_id, self.task_max_wait_time, True
+                current_time, self.task_id, self.task_max_wait_time
             )
             return task_end_events
         return []
diff --git a/workers/taskworker.py b/workers/taskworker.py
index 6e514a1..43a3434 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -27,7 +27,7 @@ def add_task(self, current_time, task):
         # Update when the task is sent to the worker
         assert (task.log.task_placed_on_worker_queue_timestamp <= current_time)
         self.add_task_to_queue_history(task, current_time)
-        _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_id, task.max_wait_time, False)
+        _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_id, task.max_wait_time)
         return task_end_events
 
     def free_slot(self, current_time):
@@ -78,7 +78,7 @@ def maybe_start_task_any(self, current_time):
             task = queued_tasks.get()
             if (current_time >= task.log.task_placed_on_worker_queue_timestamp):
                 did_exec_batch, task_end_events = self.maybe_start_task_for_type(
-                    current_time, task.task_id, task.max_wait_time, False
+                    current_time, task.task_id, task.max_wait_time
                 )
                 if did_exec_batch:
                     return task_end_events
@@ -89,10 +89,9 @@ def maybe_start_task_any(self, current_time):
         return []
     
 
-    def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_exec_batch) -> tuple[bool, list]:
+    def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> tuple[bool, list]:
         """
-            Execute a batch if 1) a batch of size max_batch_size can be created or 2) do_exec_batch is True
-            (do_exec_batch should be True when maybe is called by a wake up event)
+            Execute a batch if there are free slots available and at least 1 task queued.
 
             Returns did_exec_batch : bool, task_end_events : list[Event]
         """
@@ -114,9 +113,7 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_
                 batch.append(task)
         
         # full batch or max wait time has passed
-        if len(task_list) > 0 and self.num_free_slots > 0 \
-            and (do_exec_batch or len(batch) >= task_list[0].max_batch_size):
-
+        if len(task_list) > 0 and self.num_free_slots > 0:
             batch_end_events, task_end_time = self.batch_execute(
                 batch, current_time)
             

From c813826d0cd1042404aa5d50d36ba5c0d640eebb Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Fri, 6 Jun 2025 10:31:22 -0400
Subject: [PATCH 11/41] separate tasks for each task type; updated queue wait
 time estimates

---
 workers/taskworker.py | 127 +++++++++++++++++++++++++-----------------
 1 file changed, 76 insertions(+), 51 deletions(-)

diff --git a/workers/taskworker.py b/workers/taskworker.py
index 43a3434..296bcb6 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -4,7 +4,7 @@
 from core.network import *
 from core.events import *
 from schedulers.algo.nav_heft_algo import *
-import time
+import itertools
 
 
 class TaskWorker(Worker):
@@ -13,7 +13,7 @@ def __init__(self, simulation, num_free_slots, worker_id):
         # {task_obj1:[(preq_task_id0,arrival_time0), (preq_taks_id0, arrival_time1), ...], task2:[( ...],}
         self.waiting_tasks_buffer = defaultdict(lambda: [])
         # keep track of the queue information at time:  [ (time1,[task0,task1,]), (time2,[task1,...]),...]
-        self.queue_history = []
+        self.queue_history = {}
         self.involved = False
         self.next_check_times = {}
 
@@ -69,21 +69,39 @@ def schedule_job_heft(self, current_time, job):
 
     #  ---------------------------  TASK EXECUTION  ----------------------
 
-    def maybe_start_task_any(self, current_time):
-        task_list = self.get_queue_history(current_time, info_staleness=0)
+    def get_sorted_task_ids(self, current_time) -> tuple[list[int], dict[int, list[Task]]]:
+        """
+            Returns a list of all task_ids with at least 1 task queued on this
+            worker in order of when they are scheduled to execute (e.g. task queue
+            at index 0 is the next to be executed when a slot opens up on the worker)
+            in addition to a map of all task_ids to their task queues.
+        """
 
-        queued_tasks = queue.Queue()
-        [queued_tasks.put(task) for task in task_list]
-        while (not queued_tasks.empty()) and self.num_free_slots > 0:
-            task = queued_tasks.get()
-            if (current_time >= task.log.task_placed_on_worker_queue_timestamp):
-                did_exec_batch, task_end_events = self.maybe_start_task_for_type(
-                    current_time, task.task_id, task.max_wait_time
-                )
-                if did_exec_batch:
-                    return task_end_events
-                # keep checking queue until batch is executed or tasks run out
+        task_ids = self.queue_history.keys()
+        task_queues = { task_id: self.get_queue_history(current_time, task_id) for task_id in task_ids }
+        
+        task_ids_by_arrival = sorted(
+            filter(lambda task_id: len(task_queues[task_id]) > 0, task_ids),
+            key=lambda task_id: task_queues[task_id][0].log.task_placed_on_worker_queue_timestamp,
+        )
+
+        return task_ids_by_arrival, task_queues
 
+
+    def maybe_start_task_any(self, current_time):
+        task_ids, task_queues = self.get_sorted_task_ids(current_time)
+        
+        if self.num_free_slots > 0:
+            for task_id in task_ids:
+                first_queued_task = task_queues[task_id][0]
+                if (current_time >= first_queued_task.log.task_placed_on_worker_queue_timestamp):
+                    did_exec_batch, task_end_events = self.maybe_start_task_for_type(
+                        current_time, task_id, first_queued_task.max_wait_time
+                    )
+                    if did_exec_batch:
+                        return task_end_events
+                    # keep checking queue until batch is executed or tasks run out
+        
         # if no queued tasks, maybe is never called and no wake up events are
         # appended; in this case worker sleeps until a new task arrives
         return []
@@ -99,8 +117,7 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) ->
         did_exec_batch = False
 
         task_end_events = []
-        task_list = [task for task in self.get_queue_history(current_time, info_staleness=0) 
-                     if task.task_id == task_type]
+        task_list = self.get_queue_history(current_time, task_type, info_staleness=0)
         
         queued_tasks = queue.Queue()
         [queued_tasks.put(task) for task in task_list]
@@ -236,24 +253,25 @@ def receive_intermediate_result(self, current_time, prev_task, cur_task) -> list
     # ------------------------- queue history update helper functions ---------------
 
     def add_task_to_queue_history(self, task, current_time):
-        last_index = len(self.queue_history) - 1
-        # 0. base case
-        if last_index == -1:
-            self.queue_history.append((current_time, [task]))
+        # 0. Base case (first entry)
+        if task.task_id not in self.queue_history:
+            self.queue_history[task.task_id] = [(current_time, [task])]
             return
+
         # 1. Find the time_stamp place to add this queue information
+        last_index = len(self.queue_history[task.task_id]) - 1
         while last_index >= 0:
-            if self.queue_history[last_index][0] == current_time:
-                if task not in self.queue_history[last_index][1]:
-                    self.queue_history[last_index][1].append(task)
+            if self.queue_history[task.task_id][last_index][0] == current_time:
+                if task not in self.queue_history[task.task_id][last_index][1]:
+                    self.queue_history[task.task_id][last_index][1].append(task)
                 break
-            if self.queue_history[last_index][0] < current_time:
+            if self.queue_history[task.task_id][last_index][0] < current_time:
                 # print("2")
-                if task not in self.queue_history[last_index][1]:
-                    next_queue = self.queue_history[last_index][1].copy()
+                if task not in self.queue_history[task.task_id][last_index][1]:
+                    next_queue = self.queue_history[task.task_id][last_index][1].copy()
                     next_queue.append(task)
                     last_index += 1
-                    self.queue_history.insert(
+                    self.queue_history[task.task_id].insert(
                         last_index, (current_time, next_queue)
                     )
                 break
@@ -261,47 +279,54 @@ def add_task_to_queue_history(self, task, current_time):
             last_index -= 1
 
         # 2. added the task to all the subsequent timestamp tuples
-        while last_index < len(self.queue_history):
-            if task not in self.queue_history[last_index][1]:
-                self.queue_history[last_index][1].append(task)
+        while last_index < len(self.queue_history[task.task_id]):
+            if task not in self.queue_history[task.task_id][last_index][1]:
+                self.queue_history[task.task_id][last_index][1].append(task)
             last_index += 1
 
     def rm_task_in_queue_history(self, task, current_time):
-        last_index = len(self.queue_history) - 1
         # 0. base case: shouldn't happen
-        if last_index == -1:
+        if task.task_id not in self.queue_history:
             AssertionError("rm model cached location to an empty list")
             return
+
+        last_index = len(self.queue_history[task.task_id]) - 1
+        
         # 1. find the place to add this remove_event to the tuple list
         while last_index >= 0:
-            if self.queue_history[last_index][0] == current_time:
-                if task in self.queue_history[last_index][1]:
-                    self.queue_history[last_index][1].remove(task)
+            if self.queue_history[task.task_id][last_index][0] == current_time:
+                if task in self.queue_history[task.task_id][last_index][1]:
+                    self.queue_history[task.task_id][last_index][1].remove(task)
                 break
-            if self.queue_history[last_index][0] < current_time:
-                if task in self.queue_history[last_index][1]:
-                    next_tasks_in_queue = self.queue_history[last_index][1].copy()
+            if self.queue_history[task.task_id][last_index][0] < current_time:
+                if task in self.queue_history[task.task_id][last_index][1]:
+                    next_tasks_in_queue = self.queue_history[task.task_id][last_index][1].copy()
                     next_tasks_in_queue.remove(task)
                     last_index = last_index + 1
-                    self.queue_history.insert(
+                    self.queue_history[task.task_id].insert(
                         last_index, (current_time, next_tasks_in_queue)
                     )
                 break
             last_index -= 1  # go to prev time
         # 2. remove the task from all the subsequent tuple
-        while last_index < len(self.queue_history):
-            if task in self.queue_history[last_index]:
-                self.queue_history[last_index][1].remove(task)
+        while last_index < len(self.queue_history[task.task_id]):
+            if task in self.queue_history[task.task_id][last_index]:
+                self.queue_history[task.task_id][last_index][1].remove(task)
             last_index += 1  # do this for the remaining element after
 
-    def get_queue_history(self, current_time, info_staleness=0) -> list:
-        return self.get_history(self.queue_history, current_time, info_staleness)
+    def get_queue_history(self, current_time, task_id, info_staleness=0) -> list:
+        return self.get_history(self.queue_history[task_id], current_time, info_staleness)
 
-    def get_task_queue_waittime(self, current_time, info_staleness=0, requiring_worker_id=None):
+    def get_task_queue_waittime(self, current_time, task_id, info_staleness=0, requiring_worker_id=None):
         if requiring_worker_id != None and requiring_worker_id != self.worker_id:
             info_staleness = 0
-        queueing_tasks = self.get_queue_history(current_time, info_staleness)
-        waittime = 0
-        for task in queueing_tasks:
-            waittime += task.task_exec_duration
-        return waittime
+
+        task_ids, task_queues = self.get_sorted_task_ids(current_time)
+
+        wait_time = 0
+        for queued_task_id in task_ids:
+            for task in task_queues[queued_task_id]:
+                wait_time += task.task_exec_duration
+            if queued_task_id == task_id:
+                return wait_time
+        return wait_time

From 5de0c95e2ca1f31770961d02a8b48ccfdef51f18 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Fri, 6 Jun 2025 10:32:05 -0400
Subject: [PATCH 12/41] per task type wait times

---
 schedulers/algo/nav_heft_algo.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/schedulers/algo/nav_heft_algo.py b/schedulers/algo/nav_heft_algo.py
index eb54589..e87656e 100644
--- a/schedulers/algo/nav_heft_algo.py
+++ b/schedulers/algo/nav_heft_algo.py
@@ -83,16 +83,19 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co
         workers[worker.worker_id] = worker
     sorted_tasks = ranking_tasks(job)
     workers_to_select = [w.worker_id for w in worker_list]
-    workers_EAT = {}   # worker_id -> earliest_available_time
+    workers_EAT = {}   # worker_id -> (task_id -> earliest_available_time)
     workers_available_memory = {}  # worker_id -> available_memory
     # 1. initialize the earliest available time and memory for each worker
     for worker_id in workers_to_select:
-        cur_worker_waittime = 0
-        if consider_load:
-            cur_worker_waittime = workers[worker_id].get_task_queue_waittime(current_time, \
-                                                                             info_staleness=LOAD_INFORMATION_STALENESS, \
-                                                                             requiring_worker_id=initial_worker_id)
-        workers_EAT[worker_id] = current_time + cur_worker_waittime
+        workers_EAT[worker_id] = {
+            task_id: current_time + (workers[worker_id].get_task_queue_waittime(
+                current_time,
+                task_id,
+                info_staleness=LOAD_INFORMATION_STALENESS,
+                requiring_worker_id=initial_worker_id) if consider_load else 0)
+            for task_id in sorted_tasks
+        }
+        
         available_memory = GPU_MEMORY_SIZE
         if consider_cache:
             available_memory = workers[worker_id].used_GPUmemory(current_time, \
@@ -109,7 +112,7 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co
         fetching_model_size = 0
         for cur_worker_id in workers_to_select:
             # 2.0 consider the current worker queue wait time to determine its earliest start time
-            cur_earliest_start_time = workers_EAT[cur_worker_id]
+            cur_earliest_start_time = workers_EAT[cur_worker_id][task_id]
             # 2.1 calculate the inputs arrival time
             inputs_arrival_time = 0
             if cur_task.task_id == 0 and initial_worker_id is not None and cur_worker_id != initial_worker_id:
@@ -142,7 +145,7 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co
                 fetching_model_size = cur_fetching_model_size
         # 3. pick the worker with ealiest start time
         cur_task_finish_time = earliest_start_time + job.tasks[task_id].task_exec_duration
-        workers_EAT[selected_worker_id] = cur_task_finish_time
+        workers_EAT[selected_worker_id][task_id] = cur_task_finish_time
         allocated_tasks_info[task_id] = (selected_worker_id,  cur_task_finish_time)
         if workers_available_memory[selected_worker_id] >= fetching_model_size:
             workers_available_memory[selected_worker_id] -= fetching_model_size
@@ -155,6 +158,7 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co
 def nav_heft_task_adjustment(job, task_id, workers, current_time, local_worker_id, allocated_worker_id) -> int:
     # 1. check assigned worker wait_time to decide if need to adjust assigned worker
     cur_wait_time = workers[allocated_worker_id].get_task_queue_waittime(current_time, \
+                                                                         task_id, \
                                                                         info_staleness=LOAD_INFORMATION_STALENESS, \
                                                                         requiring_worker_id=local_worker_id)
     cur_task = job.tasks[task_id]
@@ -167,6 +171,7 @@ def nav_heft_task_adjustment(job, task_id, workers, current_time, local_worker_i
     earliest_start_time = float('inf')
     for cur_worker in workers:
         wait_time = cur_worker.get_task_queue_waittime(current_time, \
+                                                       task_id, \
                                                        info_staleness=LOAD_INFORMATION_STALENESS, \
                                                        requiring_worker_id=local_worker_id)
         cur_earliest_start_time = current_time + wait_time

From c3a8e436f40f8b0fe05ad3cd628a661673cb8397 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Fri, 6 Jun 2025 13:59:17 -0400
Subject: [PATCH 13/41] task type bug fix

---
 core/events.py        |  6 +--
 core/job.py           |  1 +
 core/task.py          |  5 ++-
 workers/taskworker.py | 99 ++++++++++++++++++++++---------------------
 4 files changed, 58 insertions(+), 53 deletions(-)

diff --git a/core/events.py b/core/events.py
index 269b9e5..f3480d6 100644
--- a/core/events.py
+++ b/core/events.py
@@ -169,17 +169,17 @@ def to_string(self):
 class BatchEndEvent(Event):
     """ Event to signify that a BATCH has been performed by the WORKER. """
 
-    def __init__(self, worker, job_ids=[], task_id=-1):
+    def __init__(self, worker, job_ids=[], task_type=(-1, -1)):
         self.worker = worker
         self.job_ids = job_ids    # integers representing the job_ids
-        self.task_id = task_id  # integer representing the task_id
+        self.task_type = task_type # (workflow_id, task_id)
 
     def run(self, current_time):
         return self.worker.free_slot(current_time)
 
     def to_string(self):
         jobs = ",".join([str(id) for id in self.job_ids])
-        return f"[Batch End (Task {self.task_id}, Jobs {jobs}) at Worker {self.worker.worker_id}]"
+        return f"[Batch End (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]"
 
 
 # for PER_JOB scheduler
diff --git a/core/job.py b/core/job.py
index 6d9822f..434c397 100644
--- a/core/job.py
+++ b/core/job.py
@@ -81,6 +81,7 @@ def job_generate_from_workflow(self):
 
             current_task = Task(self.id,  # ID of the associated unique Job
                                 task_cfg["TASK_INDEX"],  # taskID
+                                (self.job_type_id, task_cfg["TASK_INDEX"]), # task type
                                 task_cfg["EXECUTION_TIME"], 
                                 required_model_for_task, 
                                 task_cfg["INPUT_SIZE"],
diff --git a/core/task.py b/core/task.py
index b490e52..61c478a 100644
--- a/core/task.py
+++ b/core/task.py
@@ -2,9 +2,12 @@
 
 
 class Task(object):
-    def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size, max_batch_size, max_wait_time, batch_sizes, batch_exec_time):
+    def __init__(self, job_id, task_id, task_type, task_exec_duration, 
+                 required_model, input_size, result_size, max_batch_size, 
+                 max_wait_time, batch_sizes, batch_exec_time):
         self.job_id = job_id                           # id of the job the task belongs to
         self.task_id = task_id                         # id of the task itself
+        self.task_type = task_type                     # (workflow_id, task_id)
         # the time it takes to execute the task
         self.task_exec_duration = task_exec_duration
         # required model_id to execute the task. None if it is a computation task that doesn't involve ML model
diff --git a/workers/taskworker.py b/workers/taskworker.py
index 296bcb6..b9211d2 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -4,7 +4,6 @@
 from core.network import *
 from core.events import *
 from schedulers.algo.nav_heft_algo import *
-import itertools
 
 
 class TaskWorker(Worker):
@@ -22,12 +21,12 @@ def add_task(self, current_time, task):
         Add task into the local task queue
         """
 
-        # print(f"[{current_time}] W{self.worker_id}: T{task.task_id} arrived")
+        # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived")
 
         # Update when the task is sent to the worker
         assert (task.log.task_placed_on_worker_queue_timestamp <= current_time)
         self.add_task_to_queue_history(task, current_time)
-        _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_id, task.max_wait_time)
+        _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_type, task.max_wait_time)
         return task_end_events
 
     def free_slot(self, current_time):
@@ -69,34 +68,34 @@ def schedule_job_heft(self, current_time, job):
 
     #  ---------------------------  TASK EXECUTION  ----------------------
 
-    def get_sorted_task_ids(self, current_time) -> tuple[list[int], dict[int, list[Task]]]:
+    def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[(int, int)], dict[(int, int), list[Task]]]:
         """
-            Returns a list of all task_ids with at least 1 task queued on this
+            Returns a list of all task_types with at least 1 task queued on this
             worker in order of when they are scheduled to execute (e.g. task queue
             at index 0 is the next to be executed when a slot opens up on the worker)
-            in addition to a map of all task_ids to their task queues.
+            in addition to a map of all task_types to their task queues.
         """
 
-        task_ids = self.queue_history.keys()
-        task_queues = { task_id: self.get_queue_history(current_time, task_id) for task_id in task_ids }
+        task_types = self.queue_history.keys()
+        task_queues = { task_type: self.get_queue_history(current_time, task_type, info_staleness) for task_type in task_types }
         
-        task_ids_by_arrival = sorted(
-            filter(lambda task_id: len(task_queues[task_id]) > 0, task_ids),
-            key=lambda task_id: task_queues[task_id][0].log.task_placed_on_worker_queue_timestamp,
+        task_types_by_arrival = sorted(
+            filter(lambda task_type: len(task_queues[task_type]) > 0, task_types),
+            key=lambda task_type: task_queues[task_type][0].log.task_placed_on_worker_queue_timestamp,
         )
 
-        return task_ids_by_arrival, task_queues
+        return task_types_by_arrival, task_queues
 
 
     def maybe_start_task_any(self, current_time):
-        task_ids, task_queues = self.get_sorted_task_ids(current_time)
+        task_types, task_queues = self.get_sorted_task_types(current_time)
         
         if self.num_free_slots > 0:
-            for task_id in task_ids:
-                first_queued_task = task_queues[task_id][0]
+            for task_type in task_types:
+                first_queued_task = task_queues[task_type][0]
                 if (current_time >= first_queued_task.log.task_placed_on_worker_queue_timestamp):
                     did_exec_batch, task_end_events = self.maybe_start_task_for_type(
-                        current_time, task_id, first_queued_task.max_wait_time
+                        current_time, task_type, first_queued_task.max_wait_time
                     )
                     if did_exec_batch:
                         return task_end_events
@@ -131,6 +130,8 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) ->
         
         # full batch or max wait time has passed
         if len(task_list) > 0 and self.num_free_slots > 0:
+            # print(f"[{current_time}] W{self.worker_id}: Batch of {task_list} executing")
+
             batch_end_events, task_end_time = self.batch_execute(
                 batch, current_time)
             
@@ -194,7 +195,7 @@ def batch_execute(self, tasks, current_time):
             task.log.task_execution_end_timestamp = task_end_time
 
         task_end_events.append(EventOrders(task_end_time, BatchEndEvent(
-            self, job_ids=job_ids, task_id=tasks[0].task_id
+            self, job_ids=job_ids, task_type=tasks[0].task_type
         )))
 
         return task_end_events, task_end_time
@@ -254,24 +255,24 @@ def receive_intermediate_result(self, current_time, prev_task, cur_task) -> list
 
     def add_task_to_queue_history(self, task, current_time):
         # 0. Base case (first entry)
-        if task.task_id not in self.queue_history:
-            self.queue_history[task.task_id] = [(current_time, [task])]
+        if task.task_type not in self.queue_history:
+            self.queue_history[task.task_type] = [(current_time, [task])]
             return
 
         # 1. Find the time_stamp place to add this queue information
-        last_index = len(self.queue_history[task.task_id]) - 1
+        last_index = len(self.queue_history[task.task_type]) - 1
         while last_index >= 0:
-            if self.queue_history[task.task_id][last_index][0] == current_time:
-                if task not in self.queue_history[task.task_id][last_index][1]:
-                    self.queue_history[task.task_id][last_index][1].append(task)
+            if self.queue_history[task.task_type][last_index][0] == current_time:
+                if task not in self.queue_history[task.task_type][last_index][1]:
+                    self.queue_history[task.task_type][last_index][1].append(task)
                 break
-            if self.queue_history[task.task_id][last_index][0] < current_time:
+            if self.queue_history[task.task_type][last_index][0] < current_time:
                 # print("2")
-                if task not in self.queue_history[task.task_id][last_index][1]:
-                    next_queue = self.queue_history[task.task_id][last_index][1].copy()
+                if task not in self.queue_history[task.task_type][last_index][1]:
+                    next_queue = self.queue_history[task.task_type][last_index][1].copy()
                     next_queue.append(task)
                     last_index += 1
-                    self.queue_history[task.task_id].insert(
+                    self.queue_history[task.task_type].insert(
                         last_index, (current_time, next_queue)
                     )
                 break
@@ -279,54 +280,54 @@ def add_task_to_queue_history(self, task, current_time):
             last_index -= 1
 
         # 2. added the task to all the subsequent timestamp tuples
-        while last_index < len(self.queue_history[task.task_id]):
-            if task not in self.queue_history[task.task_id][last_index][1]:
-                self.queue_history[task.task_id][last_index][1].append(task)
+        while last_index < len(self.queue_history[task.task_type]):
+            if task not in self.queue_history[task.task_type][last_index][1]:
+                self.queue_history[task.task_type][last_index][1].append(task)
             last_index += 1
 
     def rm_task_in_queue_history(self, task, current_time):
         # 0. base case: shouldn't happen
-        if task.task_id not in self.queue_history:
+        if task.task_type not in self.queue_history:
             AssertionError("rm model cached location to an empty list")
             return
 
-        last_index = len(self.queue_history[task.task_id]) - 1
+        last_index = len(self.queue_history[task.task_type]) - 1
         
         # 1. find the place to add this remove_event to the tuple list
         while last_index >= 0:
-            if self.queue_history[task.task_id][last_index][0] == current_time:
-                if task in self.queue_history[task.task_id][last_index][1]:
-                    self.queue_history[task.task_id][last_index][1].remove(task)
+            if self.queue_history[task.task_type][last_index][0] == current_time:
+                if task in self.queue_history[task.task_type][last_index][1]:
+                    self.queue_history[task.task_type][last_index][1].remove(task)
                 break
-            if self.queue_history[task.task_id][last_index][0] < current_time:
-                if task in self.queue_history[task.task_id][last_index][1]:
-                    next_tasks_in_queue = self.queue_history[task.task_id][last_index][1].copy()
+            if self.queue_history[task.task_type][last_index][0] < current_time:
+                if task in self.queue_history[task.task_type][last_index][1]:
+                    next_tasks_in_queue = self.queue_history[task.task_type][last_index][1].copy()
                     next_tasks_in_queue.remove(task)
                     last_index = last_index + 1
-                    self.queue_history[task.task_id].insert(
+                    self.queue_history[task.task_type].insert(
                         last_index, (current_time, next_tasks_in_queue)
                     )
                 break
             last_index -= 1  # go to prev time
         # 2. remove the task from all the subsequent tuple
-        while last_index < len(self.queue_history[task.task_id]):
-            if task in self.queue_history[task.task_id][last_index]:
-                self.queue_history[task.task_id][last_index][1].remove(task)
+        while last_index < len(self.queue_history[task.task_type]):
+            if task in self.queue_history[task.task_type][last_index]:
+                self.queue_history[task.task_type][last_index][1].remove(task)
             last_index += 1  # do this for the remaining element after
 
-    def get_queue_history(self, current_time, task_id, info_staleness=0) -> list:
-        return self.get_history(self.queue_history[task_id], current_time, info_staleness)
+    def get_queue_history(self, current_time, task_type, info_staleness=0) -> list:
+        return self.get_history(self.queue_history[task_type], current_time, info_staleness)
 
-    def get_task_queue_waittime(self, current_time, task_id, info_staleness=0, requiring_worker_id=None):
+    def get_task_queue_waittime(self, current_time, task_type, info_staleness=0, requiring_worker_id=None):
         if requiring_worker_id != None and requiring_worker_id != self.worker_id:
             info_staleness = 0
 
-        task_ids, task_queues = self.get_sorted_task_ids(current_time)
+        task_types, task_queues = self.get_sorted_task_types(current_time, info_staleness=info_staleness)
 
         wait_time = 0
-        for queued_task_id in task_ids:
-            for task in task_queues[queued_task_id]:
+        for queued_task_type in task_types:
+            for task in task_queues[queued_task_type]:
                 wait_time += task.task_exec_duration
-            if queued_task_id == task_id:
+            if queued_task_type == task_type:
                 return wait_time
         return wait_time

From a1449ec495fbc079bf0cf17d560e55b4ffe80db0 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Fri, 6 Jun 2025 14:59:24 -0400
Subject: [PATCH 14/41] task type fix

---
 schedulers/algo/nav_heft_algo.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/schedulers/algo/nav_heft_algo.py b/schedulers/algo/nav_heft_algo.py
index e87656e..1f2e2e3 100644
--- a/schedulers/algo/nav_heft_algo.py
+++ b/schedulers/algo/nav_heft_algo.py
@@ -83,14 +83,14 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co
         workers[worker.worker_id] = worker
     sorted_tasks = ranking_tasks(job)
     workers_to_select = [w.worker_id for w in worker_list]
-    workers_EAT = {}   # worker_id -> (task_id -> earliest_available_time)
+    workers_EAT = {}   # worker_id -> (task_type -> earliest_available_time)
     workers_available_memory = {}  # worker_id -> available_memory
     # 1. initialize the earliest available time and memory for each worker
     for worker_id in workers_to_select:
         workers_EAT[worker_id] = {
             task_id: current_time + (workers[worker_id].get_task_queue_waittime(
                 current_time,
-                task_id,
+                (job.job_type_id, task_id),
                 info_staleness=LOAD_INFORMATION_STALENESS,
                 requiring_worker_id=initial_worker_id) if consider_load else 0)
             for task_id in sorted_tasks
@@ -158,7 +158,7 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co
 def nav_heft_task_adjustment(job, task_id, workers, current_time, local_worker_id, allocated_worker_id) -> int:
     # 1. check assigned worker wait_time to decide if need to adjust assigned worker
     cur_wait_time = workers[allocated_worker_id].get_task_queue_waittime(current_time, \
-                                                                         task_id, \
+                                                                        (job.job_type_id, task_id), \
                                                                         info_staleness=LOAD_INFORMATION_STALENESS, \
                                                                         requiring_worker_id=local_worker_id)
     cur_task = job.tasks[task_id]
@@ -171,7 +171,7 @@ def nav_heft_task_adjustment(job, task_id, workers, current_time, local_worker_i
     earliest_start_time = float('inf')
     for cur_worker in workers:
         wait_time = cur_worker.get_task_queue_waittime(current_time, \
-                                                       task_id, \
+                                                       (job.job_type_id, task_id), \
                                                        info_staleness=LOAD_INFORMATION_STALENESS, \
                                                        requiring_worker_id=local_worker_id)
         cur_earliest_start_time = current_time + wait_time

From 14b857d0568e7abf84bdc46587d038892416ac42 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Fri, 6 Jun 2025 15:00:44 -0400
Subject: [PATCH 15/41] event logging fix

---
 schedulers/centralized/simulation_central.py     | 3 ++-
 schedulers/decentralized/simulation_decentral.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/schedulers/centralized/simulation_central.py b/schedulers/centralized/simulation_central.py
index 3754077..5f75adc 100644
--- a/schedulers/centralized/simulation_central.py
+++ b/schedulers/centralized/simulation_central.py
@@ -49,11 +49,12 @@ def run(self):
             self.event_queue.put(EventOrders(
                 external_client_id * job_create_interval, \
                 JobCreationAtExternalClient(self, external_client_id)))
+
         last_time = 0
         while self.remaining_jobs > 0:
             cur_event = self.event_queue.get()
 
-            if type(cur_event) != WorkerWakeUpEvent or cur_event.will_run(cur_event.current_time):
+            if type(cur_event.event) != WorkerWakeUpEvent or cur_event.event.will_run(cur_event.current_time):
                 self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()]
 
             assert cur_event.current_time >= last_time
diff --git a/schedulers/decentralized/simulation_decentral.py b/schedulers/decentralized/simulation_decentral.py
index 0ba0f22..cc80ac5 100644
--- a/schedulers/decentralized/simulation_decentral.py
+++ b/schedulers/decentralized/simulation_decentral.py
@@ -51,7 +51,7 @@ def run(self):
         while self.remaining_jobs > 0:
             cur_event = self.event_queue.get()
 
-            if type(cur_event) != WorkerWakeUpEvent or cur_event.will_run(cur_event.current_time):
+            if type(cur_event.event) != WorkerWakeUpEvent or cur_event.event.will_run(cur_event.current_time):
                 self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()]
 
             assert cur_event.current_time >= last_time

From f8e40de526d7edca456bf308bebc689b6ae79067 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 20 May 2025 10:49:32 -0400
Subject: [PATCH 16/41] env vars setup script

---
 set_env.sh | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100755 set_env.sh

diff --git a/set_env.sh b/set_env.sh
new file mode 100755
index 0000000..2437e4e
--- /dev/null
+++ b/set_env.sh
@@ -0,0 +1,2 @@
+export SIMULATION_DIR=$(pwd)
+export PYTHONPATH="${PYTHONPATH}:${SIMULATION_DIR}"

From e497a17e35f1af54ee9846da67a011b39a75367f Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Fri, 6 Jun 2025 18:51:36 -0400
Subject: [PATCH 17/41] dummy wf

---
 core/workflow.py | 105 +++++++++++++++++++++++------------------------
 1 file changed, 52 insertions(+), 53 deletions(-)

diff --git a/core/workflow.py b/core/workflow.py
index fc8f7a4..0a1bb50 100644
--- a/core/workflow.py
+++ b/core/workflow.py
@@ -16,9 +16,9 @@
                 "OUTPUT_SIZE": 2,            # in kB
                 "EXECUTION_TIME": 561,       # avg time, in ms
                 "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
+                "MAX_WAIT_TIME": 1000,       # ms
                 "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [561, 561, 561, 561, 561]
+                "BATCH_EXEC_TIME": [561, 673, 808, 969, 1346]
                 },
                {"MODEL_NAME": "marian",
                 "MODEL_ID": 1,
@@ -30,9 +30,9 @@
                 "OUTPUT_SIZE": 2,
                 "EXECUTION_TIME": 441,       # in ms
                 "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
+                "MAX_WAIT_TIME": 1000,       # ms
                 "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [441, 441, 441, 441, 441]
+                "BATCH_EXEC_TIME": [441, 529, 687, 963, 1374]
                 },
                {"MODEL_NAME": "mt5",
                 "MODEL_ID": 2,
@@ -44,11 +44,11 @@
                 "OUTPUT_SIZE": 2,
                 "EXECUTION_TIME": 778,       # in ms
                 "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
+                "MAX_WAIT_TIME": 1000,       # ms
                 "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [778, 778, 778, 778, 778]
+                "BATCH_EXEC_TIME": [778, 855, 941, 1035, 1139]
                 },
-                {"MODEL_NAME": "mt5",
+               {"MODEL_NAME": "mt5",
                 "MODEL_ID": 2,
                 "TASK_INDEX": 3,
                 "PREV_TASK_INDEX": [0],
@@ -58,11 +58,11 @@
                 "OUTPUT_SIZE": 2,
                 "EXECUTION_TIME": 803,       # in ms
                 "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
+                "MAX_WAIT_TIME": 1000,       # ms
                 "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [803, 803, 803, 803, 803]
+                "BATCH_EXEC_TIME": [803, 833, 871, 939, 990]
                 },
-                {"MODEL_NAME": "",
+               {"MODEL_NAME": "",
                 "MODEL_ID": -1,
                 "TASK_INDEX": 4,
                 "PREV_TASK_INDEX": [1,2,3],
@@ -71,10 +71,10 @@
                 "INPUT_SIZE": 2,           
                 "OUTPUT_SIZE": 2,
                 "EXECUTION_TIME": 1,         # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [1, 1, 1, 1, 1]
+                "MAX_BATCH_SIZE": 64,
+                "MAX_WAIT_TIME": 500,        # ms
+                "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64],
+                "BATCH_EXEC_TIME": [1, 1, 1, 1, 1, 1, 1]
                 },
                ]
      },
@@ -93,9 +93,9 @@
                 "OUTPUT_SIZE": 2,            # in kB
                 "EXECUTION_TIME": 560,       # avg time, in ms
                 "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
+                "MAX_WAIT_TIME": 1000,       # ms
                 "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [560, 560, 560, 560, 560]
+                "BATCH_EXEC_TIME": [560, 616, 677, 745, 820]
                 },
                {"MODEL_NAME": "NLI",
                 "MODEL_ID": 3,
@@ -106,10 +106,10 @@
                 "INPUT_SIZE": 1,
                 "OUTPUT_SIZE": 1,
                 "EXECUTION_TIME": 27,        # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [27, 27, 27, 27, 27]
+                "MAX_BATCH_SIZE": 8,
+                "MAX_WAIT_TIME": 500,        # ms
+                "BATCH_SIZES": [1, 2, 4, 8],
+                "BATCH_EXEC_TIME": [27, 48, 89, 170]
                 }
                ]
      },
@@ -127,9 +127,9 @@
                 "OUTPUT_SIZE": 20,
                 "EXECUTION_TIME": 283,       # avg time, in ms
                 "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
+                "MAX_WAIT_TIME": 500,        # ms
                 "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [283, 283, 283, 283, 283]
+                "BATCH_EXEC_TIME": [283, 339, 407, 489, 590]
                 },
                {"MODEL_NAME": "NLI",
                 "MODEL_ID": 3,
@@ -140,10 +140,10 @@
                 "INPUT_SIZE": 20,            # 299×299, assuming 64 bits representation
                 "OUTPUT_SIZE": 10, 
                 "EXECUTION_TIME": 26,        # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [26, 26, 26, 26, 26]
+                "MAX_BATCH_SIZE": 2,
+                "MAX_WAIT_TIME": 100,        # ms
+                "BATCH_SIZES": [1, 2],
+                "BATCH_EXEC_TIME": [26, 48]
                 },
                {"MODEL_NAME": "txt2speech",
                 "MODEL_ID": 5,
@@ -154,12 +154,12 @@
                 "INPUT_SIZE": 20,
                 "OUTPUT_SIZE": 3000,
                 "EXECUTION_TIME": 76,        # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [76, 76, 76, 76, 76]
+                "MAX_BATCH_SIZE": 32,
+                "MAX_WAIT_TIME": 100,        # ms
+                "BATCH_SIZES": [1, 2, 4, 8, 16, 32],
+                "BATCH_EXEC_TIME": [76, 77, 82, 91, 106, 135]
                 },
-                {"MODEL_NAME": "aggregate",
+               {"MODEL_NAME": "aggregate",
                 "MODEL_ID": -1,
                 "TASK_INDEX": 3,
                 "PREV_TASK_INDEX": [1,2],
@@ -168,10 +168,10 @@
                 "INPUT_SIZE": 3000,
                 "OUTPUT_SIZE": 3000,
                 "EXECUTION_TIME": 0.2,       # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [0.2, 0.2, 0.2, 0.2, 0.2]
+                "MAX_BATCH_SIZE": 64,
+                "MAX_WAIT_TIME": 100,        # ms
+                "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64],
+                "BATCH_EXEC_TIME": [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.3]
                 }
                ]
      },
@@ -188,10 +188,10 @@
                 "INPUT_SIZE": 3000,
                 "OUTPUT_SIZE": 3000,
                 "EXECUTION_TIME": 0.6,       # avg time, in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [0.6, 0.6, 0.6, 0.6, 0.6]
+                "MAX_BATCH_SIZE": 32,
+                "MAX_WAIT_TIME": 100,        # ms
+                "BATCH_SIZES": [1, 2, 4, 8, 16, 32],
+                "BATCH_EXEC_TIME": [0.6, 0.6, 0.6, 0.6, 0.6, 0.6]
                 },
                {"MODEL_NAME": "DETR",
                 "MODEL_ID": 8,
@@ -202,10 +202,10 @@
                 "INPUT_SIZE": 3000,          # 299×299, assuming 64 bits representation
                 "OUTPUT_SIZE": 3000,
                 "EXECUTION_TIME": 178,       # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [178, 178, 178, 178, 178]
+                "MAX_BATCH_SIZE": 4,
+                "MAX_WAIT_TIME": 500,        # ms
+                "BATCH_SIZES": [1, 2, 4],
+                "BATCH_EXEC_TIME": [178, 267, 400]
                 },
                {"MODEL_NAME": "Depth",
                 "MODEL_ID": 9,
@@ -217,11 +217,11 @@
                 "OUTPUT_SIZE": 3000,
                 "EXECUTION_TIME": 147,       # in ms
                 "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
+                "MAX_WAIT_TIME": 500,        # ms
                 "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [147, 147, 147, 147, 147]
+                "BATCH_EXEC_TIME": [147, 150, 155, 162, 172]
                 },
-                {"MODEL_NAME": "Aggregate",
+               {"MODEL_NAME": "Aggregate",
                 "MODEL_ID": -1,
                 "TASK_INDEX": 3,
                 "PREV_TASK_INDEX": [1,2],
@@ -230,12 +230,11 @@
                 "INPUT_SIZE": 3000,
                 "OUTPUT_SIZE": 3000,
                 "EXECUTION_TIME": 104,       # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 5000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [104, 104, 104, 104, 104]
+                "MAX_BATCH_SIZE": 8,
+                "MAX_WAIT_TIME": 500,        # ms
+                "BATCH_SIZES": [1, 2, 4, 8],
+                "BATCH_EXEC_TIME": [104, 130, 165, 213]
                 }
                ]
-     },
-
-]
+     }
+]
\ No newline at end of file

From 22dbc005d8a328f71c063ed664db37bb9af2c33c Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 10 Jun 2025 10:35:19 -0400
Subject: [PATCH 18/41] merge

---
 experiments/parse_results.py | 158 +++++++++++++++++++++++++++++++++++
 1 file changed, 158 insertions(+)
 create mode 100644 experiments/parse_results.py

diff --git a/experiments/parse_results.py b/experiments/parse_results.py
new file mode 100644
index 0000000..ee2ecfe
--- /dev/null
+++ b/experiments/parse_results.py
@@ -0,0 +1,158 @@
+import sys
+import os
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from core.workflow import *
+from functools import reduce
+
+
+# TODO: verify units
+def plot_response_time_vs_arrival_time(job_df, out_path):
+    plt.figure(figsize=(10, 6))
+
+    job_types = set(job_df["workflow_type"])
+    job_names = { job_type: list(filter(lambda job: job["JOB_TYPE"]==job_type, WORKFLOW_LIST))[0]["JOB_NAME"] 
+                 for job_type in job_types }
+
+    fst_job_create_time = job_df["job_create_time"][0]
+    for jt in job_types:
+        job_create_times = job_df[job_df["workflow_type"] == jt]["job_create_time"] - fst_job_create_time
+        job_response_times = job_df[job_df["workflow_type"] == jt]["response_time"]
+
+        plt.scatter(
+            job_create_times,
+            job_response_times,
+            label=f"Workflow {jt}: {job_names[jt]}",
+            s=4
+        )
+    
+    plt.xlabel("Job arrival time (ms since start)")
+    plt.ylabel("Response time (ms)")
+    plt.title("Response Time vs. Arrival Time by Job Type")
+
+    plt.legend()
+    plt.savefig(os.path.join(out_path, "response_vs_arrival.png"))
+
+
+def plot_batch_size_vs_batch_start(event_df, out_path):
+    batch_start_events = event_df[event_df["event"].str.contains("Batch Start")]
+
+    task_types = set(batch_start_events["event"].str.extract(r"Task \(([0-9]+, [0-9]+)\)")[0])
+    model_names = { 
+        task_type: list(filter(
+            lambda task: task["TASK_INDEX"]==int(task_type.split(", ")[1]),
+            list(filter(lambda job: job["JOB_TYPE"]==int(task_type.split(",")[0]), WORKFLOW_LIST))[0]["TASKS"]
+        ))[0]["MODEL_NAME"] for task_type in task_types }
+
+    for task_type in task_types:
+        type_details = [int(item) for item in task_type.split(", ")] # [workflow_id, task_id]
+
+        fig = plt.figure(figsize=(10, 6))
+
+        batch_start_events_for_type = batch_start_events[batch_start_events["event"].str.contains(f"Task \({task_type}\)")]
+        batch_sizes = batch_start_events_for_type["event"].str.extract(r"Jobs ([0-9|,]+)")[0].str.count(f'[0-9]+')
+        
+        plt.scatter(
+            batch_start_events_for_type["time"],
+            batch_sizes,
+            label=f"Workflow {type_details[0]}, Task ID {type_details[1]}: Model {model_names[task_type]}",
+            s=4
+        )
+    
+        plt.xlabel("Batch exec start time (ms since start)")
+        plt.ylabel("Batch size")
+        plt.title("Batch Size vs. Time by Model")
+
+        plt.legend()
+        plt.savefig(os.path.join(out_path, f"wf_{type_details[0]}_task_{type_details[1]}_batch_size_vs_time.png"))
+
+
+def plot_batch_size_bar_chart(event_df, out_path):
+    batch_start_events = event_df[event_df["event"].str.contains("Batch Start")]
+
+    task_types = set(batch_start_events["event"].str.extract(r"Task \(([0-9]+, [0-9]+)\)")[0])
+    task_details = { 
+        task_type: list(filter(
+            lambda task: task["TASK_INDEX"]==int(task_type.split(", ")[1]),
+            list(filter(lambda job: job["JOB_TYPE"]==int(task_type.split(",")[0]), WORKFLOW_LIST))[0]["TASKS"]
+        ))[0] for task_type in task_types }
+
+    for task_type in task_types:
+        type_details = [int(item) for item in task_type.split(", ")] # [workflow_id, task_id]
+
+        fig = plt.figure(figsize=(8, 6))
+
+        batch_start_events_for_type = batch_start_events[batch_start_events["event"].str.contains(f"Task \({task_type}\)")]
+        batch_size_events = batch_start_events_for_type["event"].str.extract(r"Jobs ([0-9|,]+)")[0].str.count(f'[0-9]+')
+        batch_size_counts = list(map(lambda size: (batch_size_events == size).sum(),
+                                task_details[task_type]["BATCH_SIZES"]))
+
+        plt.bar(
+            range(len(task_details[task_type]["BATCH_SIZES"])),
+            batch_size_counts
+        )
+    
+        plt.xticks(range(len(task_details[task_type]["BATCH_SIZES"])), task_details[task_type]["BATCH_SIZES"])
+        plt.xlabel("Batch sizes")
+        plt.ylabel("Number of batches")
+        plt.title(f"Batch size distribution for {task_details[task_type]["MODEL_NAME"]} Model")
+
+        plt.savefig(os.path.join(out_path, f"wf_{type_details[0]}_task_{type_details[1]}_batch_size_dist.png"))
+
+
+def gen_per_task_stats(task_df, out_path):
+    job_types = set(task_df["workflow_type"])
+    task_types_per_job = list(map(
+        lambda jt: set(task_df[task_df["workflow_type"] == jt]["task_id"]),
+        job_types
+    ))
+
+    task_stat_types = ["arrival_at_worker_to_exec_start_time", "arrival_at_worker_to_enqueue_time",
+                       "enqueue_to_exec_start_time", "model_fetching_time"]
+    task_stats = reduce(
+        lambda acc, t: acc + [f"mean_{t}", f"median_{t}", f"p99_{t}"],
+        task_stat_types,
+        []
+    )
+    task_stat_df = pd.DataFrame(columns=["job_type", "task_type"] + task_stats)
+
+    for i, jt in enumerate(job_types):
+        for task_type in task_types_per_job[i]:
+            task_df_row_i = len(task_stat_df)
+            task_stat_df.loc[task_df_row_i] = {"job_type": jt, "task_type": task_type}
+
+            task_set = task_df[(task_df["workflow_type"] == jt)
+                               & (task_df["task_id"] == task_type)]
+            
+            task_stat_data = {
+                "arrival_at_worker_to_exec_start_time": task_set["task_start_exec_time"] - task_set["task_arrival_time"],
+                "arrival_at_worker_to_enqueue_time": task_set["dependency_wait_time"],
+                "enqueue_to_exec_start_time": task_set["time_spent_in_queue"],
+                "model_fetching_time": task_set["model_fetching_time"]
+            }
+            for stat in task_stat_types:
+                task_stat_df.loc[task_df_row_i, f"mean_{stat}"] = task_stat_data[stat].mean()
+                task_stat_df.loc[task_df_row_i, f"median_{stat}"] = task_stat_data[stat].median()
+                task_stat_df.loc[task_df_row_i, f"p99_{stat}"] = task_stat_data[stat].quantile(0.99)
+
+    task_stat_df.to_csv(os.path.join(out_path, "per_task_avgs.csv"))
+    
+
+def gen_stats(job_df, event_df):
+    print(f"Mean response time: {job_df["response_time"].mean()}, Max: {job_df["response_time"].max()}")
+    # print(f"TPUT: {len(job_df) / event_df.loc[len(events_df)-1]["time"]}")
+    
+
+results_dir_path = sys.argv[1] # results/<scheduler_type>
+out_path = sys.argv[2] if len(sys.argv) > 2 else "parsed_results"
+
+os.makedirs(out_path, exist_ok=True)
+
+job_df = pd.read_csv(os.path.join(results_dir_path, "job_breakdown.csv"))
+# task_df = pd.read_csv(os.path.join(results_dir_path, "loadDelay_1_placementDelay_1.csv"))
+events_df = pd.read_csv(os.path.join(results_dir_path, 'events_by_time.csv'))
+
+plot_batch_size_bar_chart(events_df, out_path)
+plot_batch_size_vs_batch_start(events_df, out_path)
+plot_response_time_vs_arrival_time(job_df, out_path)
\ No newline at end of file

From 6243924eceaa9b5dd456d664cec83f0dc006d1ae Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 10 Jun 2025 10:55:56 -0400
Subject: [PATCH 19/41] real workflow

---
 core/config.py   |  10 +-
 core/workflow.py | 302 +++++++++++++++++------------------------------
 2 files changed, 115 insertions(+), 197 deletions(-)

diff --git a/core/config.py b/core/config.py
index d641630..c088dd1 100644
--- a/core/config.py
+++ b/core/config.py
@@ -1,14 +1,14 @@
 """ --------      Worker Machines Parameters      -------- """
-GPU_MEMORY_SIZE = 14000000  # in KB, 15BG for Tesla T4
+GPU_MEMORY_SIZE = 24000000  # in KB, 24GB for NVIDIA A30
 
-TOTAL_NUM_OF_WORKERS = 140
+TOTAL_NUM_OF_WORKERS = 50
 
 
 """  --------       Workload Parameters    --------  """
-TOTAL_NUM_OF_JOBS = 1000
+TOTAL_NUM_OF_JOBS = 10000
 
 # The interval between two consecutive job creation events at each external client 
-DEFAULT_CREATION_INTERVAL_PERCLIENT = 100     # ms. 
+DEFAULT_CREATION_INTERVAL_PERCLIENT = 0.2     # ms.
 
 WORKLOAD_DISTRIBUTION = "POISON"  # UNIFORM | POISON | GAMMA
 
@@ -20,4 +20,4 @@
 
 PLACEMENT_INFORMATION_STALENESS = 1  # in ms
 
-RESCHEDULE_THREASHOLD = 1.5
\ No newline at end of file
+RESCHEDULE_THREASHOLD = 1.5
diff --git a/core/workflow.py b/core/workflow.py
index 0a1bb50..2f470a0 100644
--- a/core/workflow.py
+++ b/core/workflow.py
@@ -3,238 +3,156 @@
 
 WORKFLOW_LIST = [
     {"JOB_TYPE": 0,         # ID of the type of workflow (dependency graph)
-     "JOB_NAME": "translation",
+     "JOB_NAME": "textvision",
      # the minimum amount of time necessary to execute the whole job
-     "BEST_EXEC_TIME": 1365,
-     "TASKS": [{"MODEL_NAME": "OPT",
-                "MODEL_ID": 0,
+     "BEST_EXEC_TIME": 51.7,
+     "TASKS": [{"MODEL_NAME": "",
+                "MODEL_ID": -1,
                 "TASK_INDEX": 0,
                 "PREV_TASK_INDEX": [],
-                "NEXT_TASK_INDEX": [1,2,3],
-                "MODEL_SIZE": 5720000,       # in kB
+                "NEXT_TASK_INDEX": [1, 2],
+                "MODEL_SIZE": 0,             # in KB
                 "INPUT_SIZE": 1,
-                "OUTPUT_SIZE": 2,            # in kB
-                "EXECUTION_TIME": 561,       # avg time, in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 1000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [561, 673, 808, 969, 1346]
+                "OUTPUT_SIZE": 1,
+                "EXECUTION_TIME": 1,         # in ms
+                "MAX_BATCH_SIZE": 128,
+                "MAX_WAIT_TIME": 1,         # ms
+                "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64, 128],
+                "BATCH_EXEC_TIME": [1, 1, 1, 1, 1, 1, 1, 1]
                 },
-               {"MODEL_NAME": "marian",
-                "MODEL_ID": 1,
+               {"MODEL_NAME": "text_encoder",
+                "MODEL_ID": 0,
                 "TASK_INDEX": 1,
                 "PREV_TASK_INDEX": [0],
-                "NEXT_TASK_INDEX": [4],
-                "MODEL_SIZE": 800000,        # in kB
-                "INPUT_SIZE": 2,           
-                "OUTPUT_SIZE": 2,
-                "EXECUTION_TIME": 441,       # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 1000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [441, 529, 687, 963, 1374]
+                "NEXT_TASK_INDEX": [3],
+                "MODEL_SIZE": 5677000,       # in kB
+                "INPUT_SIZE": 1,
+                "OUTPUT_SIZE": 2,            # in kB
+                "EXECUTION_TIME": 10,        # avg time, in ms
+                "MAX_BATCH_SIZE": 128,
+                "MAX_WAIT_TIME": 1,         # ms
+                "BATCH_SIZES": [1, 4, 8, 16, 32, 64, 128],
+                "BATCH_EXEC_TIME": [10, 10, 11, 12, 15, 20, 31]
                 },
-               {"MODEL_NAME": "mt5",
-                "MODEL_ID": 2,
+               {"MODEL_NAME": "vision_encoder",
+                "MODEL_ID": 1,
                 "TASK_INDEX": 2,
                 "PREV_TASK_INDEX": [0],
-                "NEXT_TASK_INDEX": [4],
-                "MODEL_SIZE": 2000000,       # in KB
-                "INPUT_SIZE": 2,           
-                "OUTPUT_SIZE": 2,
-                "EXECUTION_TIME": 778,       # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 1000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [778, 855, 941, 1035, 1139]
+                "NEXT_TASK_INDEX": [3],
+                "MODEL_SIZE": 11655000,      # in kB
+                "INPUT_SIZE": 10000,
+                "OUTPUT_SIZE": 100,
+                "EXECUTION_TIME": 31,        # in ms
+                "MAX_BATCH_SIZE": 8,
+                "MAX_WAIT_TIME": 1,        # ms
+                "BATCH_SIZES": [1, 4, 8],
+                "BATCH_EXEC_TIME": [31, 98, 183]
                 },
-               {"MODEL_NAME": "mt5",
+               {"MODEL_NAME": "flmr",
                 "MODEL_ID": 2,
                 "TASK_INDEX": 3,
-                "PREV_TASK_INDEX": [0],
+                "PREV_TASK_INDEX": [1,2],
                 "NEXT_TASK_INDEX": [4],
-                "MODEL_SIZE": 2000000,       # in KB
-                "INPUT_SIZE": 2,           
-                "OUTPUT_SIZE": 2,
-                "EXECUTION_TIME": 803,       # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 1000,       # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [803, 833, 871, 939, 990]
+                "MODEL_SIZE": 854000,        # in KB
+                "INPUT_SIZE": 102,
+                "OUTPUT_SIZE": 5,
+                "EXECUTION_TIME": 1.7,       # in ms
+                "MAX_BATCH_SIZE": 32,
+                "MAX_WAIT_TIME": 1,         # ms
+                "BATCH_SIZES": [1, 2, 4, 8, 16, 32],
+                "BATCH_EXEC_TIME": [1.7, 1.9, 1.9, 2, 2.6, 3.1]
                 },
-               {"MODEL_NAME": "",
-                "MODEL_ID": -1,
+               {"MODEL_NAME": "search",
+                "MODEL_ID": 3,
                 "TASK_INDEX": 4,
-                "PREV_TASK_INDEX": [1,2,3],
+                "PREV_TASK_INDEX": [3],
                 "NEXT_TASK_INDEX": [],
-                "MODEL_SIZE": 0,             # in KB
-                "INPUT_SIZE": 2,           
+                "MODEL_SIZE": 777000,        # in KB
+                "INPUT_SIZE": 5,
                 "OUTPUT_SIZE": 2,
-                "EXECUTION_TIME": 1,         # in ms
-                "MAX_BATCH_SIZE": 64,
-                "MAX_WAIT_TIME": 500,        # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64],
-                "BATCH_EXEC_TIME": [1, 1, 1, 1, 1, 1, 1]
-                },
+                "EXECUTION_TIME": 18,        # in ms
+                "MAX_BATCH_SIZE": 16,
+                "MAX_WAIT_TIME": 1,        # ms
+                "BATCH_SIZES": [1, 4, 8, 16],
+                "BATCH_EXEC_TIME": [18, 64, 114, 209]
+                }
                ]
      },
 
     {"JOB_TYPE": 1,
-     "JOB_NAME": "question_answer",
+     "JOB_NAME": "tts",
      # the minimum amount of time necessary to execute the whole job
-     "BEST_EXEC_TIME": 587,
-     "TASKS": [{"MODEL_NAME": "OPT",
-                "MODEL_ID": 0,
+     "BEST_EXEC_TIME": 308.4,
+     "TASKS": [{"MODEL_NAME": "audio_det",
+                "MODEL_ID": 4,
                 "TASK_INDEX": 0,
                 "PREV_TASK_INDEX": [],
                 "NEXT_TASK_INDEX": [1],
-                "MODEL_SIZE": 5720000,       # in kB
-                "INPUT_SIZE": 1,
+                "MODEL_SIZE": 10525000,      # in kB
+                "INPUT_SIZE": 10000,
                 "OUTPUT_SIZE": 2,            # in kB
-                "EXECUTION_TIME": 560,       # avg time, in ms
+                "EXECUTION_TIME": 66,        # avg time, in ms
                 "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 1000,       # ms
+                "MAX_WAIT_TIME": 1,        # ms
                 "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [560, 616, 677, 745, 820]
+                "BATCH_EXEC_TIME": [66, 68, 70, 76, 127]
                 },
-               {"MODEL_NAME": "NLI",
-                "MODEL_ID": 3,
-                "TASK_INDEX": 1,
-                "PREV_TASK_INDEX": [0],
-                "NEXT_TASK_INDEX": [],
-                "MODEL_SIZE": 2140000,       # in kB
-                "INPUT_SIZE": 1,
-                "OUTPUT_SIZE": 1,
-                "EXECUTION_TIME": 27,        # in ms
-                "MAX_BATCH_SIZE": 8,
-                "MAX_WAIT_TIME": 500,        # ms
-                "BATCH_SIZES": [1, 2, 4, 8],
-                "BATCH_EXEC_TIME": [27, 48, 89, 170]
-                }
-               ]
-     },
-
-    {"JOB_TYPE": 2,  # ID of the type of workflow (dependency graph)
-     "JOB_NAME": "img_to_sound",
-     "BEST_EXEC_TIME": 359.2,
-     "TASKS": [{"MODEL_NAME": "vit",
-                "MODEL_ID": 4,
-                "TASK_INDEX": 0,
-                "PREV_TASK_INDEX": [],
-                "NEXT_TASK_INDEX": [1,2],
-                "MODEL_SIZE": 1700000,       # in kB
-                "INPUT_SIZE": 3000,          # 224 x 224 x 3 shape, assuming 64 bits representation
-                "OUTPUT_SIZE": 20,
-                "EXECUTION_TIME": 283,       # avg time, in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 500,        # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [283, 339, 407, 489, 590]
-                },
-               {"MODEL_NAME": "NLI",
-                "MODEL_ID": 3,
+               {"MODEL_NAME": "text_encoder_2",
+                "MODEL_ID": 5,
                 "TASK_INDEX": 1,
                 "PREV_TASK_INDEX": [0],
-                "NEXT_TASK_INDEX": [3],
-                "MODEL_SIZE": 2140000,       # in kB
-                "INPUT_SIZE": 20,            # 299×299, assuming 64 bits representation
-                "OUTPUT_SIZE": 10, 
-                "EXECUTION_TIME": 26,        # in ms
-                "MAX_BATCH_SIZE": 2,
-                "MAX_WAIT_TIME": 100,        # ms
-                "BATCH_SIZES": [1, 2],
-                "BATCH_EXEC_TIME": [26, 48]
+                "NEXT_TASK_INDEX": [2],
+                "MODEL_SIZE": 427000,        # in kB
+                "INPUT_SIZE": 2,
+                "OUTPUT_SIZE": 4,
+                "EXECUTION_TIME": 17,        # in ms
+                "MAX_BATCH_SIZE": 64,
+                "MAX_WAIT_TIME": 1,         # ms
+                "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64],
+                "BATCH_EXEC_TIME": [17, 18, 18, 19, 19, 20, 22]
                 },
-               {"MODEL_NAME": "txt2speech",
-                "MODEL_ID": 5,
+               {"MODEL_NAME": "faiss_search",
+                "MODEL_ID": 6,
                 "TASK_INDEX": 2,
-                "PREV_TASK_INDEX": [0],
-                "NEXT_TASK_INDEX": [3],
-                "MODEL_SIZE": 2700000,       # in kB
-                "INPUT_SIZE": 20,
-                "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 76,        # in ms
-                "MAX_BATCH_SIZE": 32,
-                "MAX_WAIT_TIME": 100,        # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16, 32],
-                "BATCH_EXEC_TIME": [76, 77, 82, 91, 106, 135]
+                "PREV_TASK_INDEX": [1],
+                "NEXT_TASK_INDEX": [3,4],
+                "MODEL_SIZE": 783000,        # in kB
+                "INPUT_SIZE": 4,
+                "OUTPUT_SIZE": 2,
+                "EXECUTION_TIME": 0.4,       # in ms
+                "MAX_BATCH_SIZE": 256,
+                "MAX_WAIT_TIME": 1,         # ms
+                "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64, 128, 256],
+                "BATCH_EXEC_TIME": [0.4, 0.4, 0.4, 0.5, 0.5, 0.6, 0.8, 1.1, 1.6]
                 },
-               {"MODEL_NAME": "aggregate",
-                "MODEL_ID": -1,
+               {"MODEL_NAME": "text_check",
+                "MODEL_ID": 7,
                 "TASK_INDEX": 3,
-                "PREV_TASK_INDEX": [1,2],
-                "NEXT_TASK_INDEX": [],
-                "MODEL_SIZE": -1,            # in kB
-                "INPUT_SIZE": 3000,
-                "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 0.2,       # in ms
-                "MAX_BATCH_SIZE": 64,
-                "MAX_WAIT_TIME": 100,        # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64],
-                "BATCH_EXEC_TIME": [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.3]
-                }
-               ]
-     },
-
-    {"JOB_TYPE": 3,  # ID of the type of workflow (dependency graph)
-     "JOB_NAME": "ImageObjDetect",
-     "BEST_EXEC_TIME": 282.6,
-     "TASKS": [{"MODEL_NAME": "entry",
-                "MODEL_ID": -1,
-                "TASK_INDEX": 0,
-                "PREV_TASK_INDEX": [],
-                "NEXT_TASK_INDEX": [1,2],
-                "MODEL_SIZE": -1,            # in kB
-                "INPUT_SIZE": 3000,
-                "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 0.6,       # avg time, in ms
-                "MAX_BATCH_SIZE": 32,
-                "MAX_WAIT_TIME": 100,        # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16, 32],
-                "BATCH_EXEC_TIME": [0.6, 0.6, 0.6, 0.6, 0.6, 0.6]
-                },
-               {"MODEL_NAME": "DETR",
-                "MODEL_ID": 8,
-                "TASK_INDEX": 1,
-                "PREV_TASK_INDEX": [0],
-                "NEXT_TASK_INDEX": [3],
-                "MODEL_SIZE": 1800000,       # in kB
-                "INPUT_SIZE": 3000,          # 299×299, assuming 64 bits representation
-                "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 178,       # in ms
+                "PREV_TASK_INDEX": [2],
+                "NEXT_TASK_INDEX": [4],
+                "MODEL_SIZE": 7383000,       # in kB
+                "INPUT_SIZE": 2,
+                "OUTPUT_SIZE": 2,
+                "EXECUTION_TIME": 17,        # in ms
                 "MAX_BATCH_SIZE": 4,
-                "MAX_WAIT_TIME": 500,        # ms
+                "MAX_WAIT_TIME": 1,         # ms
                 "BATCH_SIZES": [1, 2, 4],
-                "BATCH_EXEC_TIME": [178, 267, 400]
-                },
-               {"MODEL_NAME": "Depth",
-                "MODEL_ID": 9,
-                "TASK_INDEX": 2,
-                "PREV_TASK_INDEX": [0],
-                "NEXT_TASK_INDEX": [3],
-                "MODEL_SIZE": 3900000,       # in kB
-                "INPUT_SIZE": 3000,
-                "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 147,       # in ms
-                "MAX_BATCH_SIZE": 16,
-                "MAX_WAIT_TIME": 500,        # ms
-                "BATCH_SIZES": [1, 2, 4, 8, 16],
-                "BATCH_EXEC_TIME": [147, 150, 155, 162, 172]
+                "BATCH_EXEC_TIME": [17, 25, 45]
                 },
-               {"MODEL_NAME": "Aggregate",
-                "MODEL_ID": -1,
-                "TASK_INDEX": 3,
-                "PREV_TASK_INDEX": [1,2],
+               {"MODEL_NAME": "text_to_speech",
+                "MODEL_ID": 8,
+                "TASK_INDEX": 4,
+                "PREV_TASK_INDEX": [2,3],
                 "NEXT_TASK_INDEX": [],
-                "MODEL_SIZE": -1,            # in kB
-                "INPUT_SIZE": 3000,
-                "OUTPUT_SIZE": 3000,
-                "EXECUTION_TIME": 104,       # in ms
-                "MAX_BATCH_SIZE": 8,
-                "MAX_WAIT_TIME": 500,        # ms
-                "BATCH_SIZES": [1, 2, 4, 8],
-                "BATCH_EXEC_TIME": [104, 130, 165, 213]
+                "MODEL_SIZE": 783000,        # in kB
+                "INPUT_SIZE": 4,
+                "OUTPUT_SIZE": 10000,
+                "EXECUTION_TIME": 208,       # in ms
+                "MAX_BATCH_SIZE": 1,
+                "MAX_WAIT_TIME": 1,        # ms
+                "BATCH_SIZES": [1],
+                "BATCH_EXEC_TIME": [208]
                 }
                ]
      }
-]
\ No newline at end of file
+]

From 05c3c8800639b38cdec86718d79c950e45ae1cc4 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 10 Jun 2025 16:58:39 -0400
Subject: [PATCH 20/41] lookahead model eviction

---
 workers/worker.py | 83 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 80 insertions(+), 3 deletions(-)

diff --git a/workers/worker.py b/workers/worker.py
index 88cb292..a1f71e9 100644
--- a/workers/worker.py
+++ b/workers/worker.py
@@ -12,6 +12,7 @@ def __init__(self, simulation, num_free_slots, worker_id):
         self.worker_id = worker_id
         self.simulation = simulation
         self.num_free_slots = num_free_slots
+        self.current_batch = [] # track the currently executing batch (if any)
         self.GPU_memory_models = []
         # Keep track of the list of models sitting in GPU memory at time: 
         # {time-> list of model objects} : [ (time1,[model0,model1,]), (time2,[model1,...]),...]
@@ -56,6 +57,26 @@ def used_GPUmemory(self, current_time, info_staleness=0, requiring_worker_id=Non
         return sum(m.model_size for m in models)
 
     #  ----------  LOCAL MEMORY MANAGEMENT AND RETRIEVE  ----------"""
+    def does_have_model(self, model, current_time: float, info_staleness=0) -> bool:
+        w_models = self.get_model_history(current_time, info_staleness)
+        return model in w_models
+    
+    def can_fit(self, min_required_memory: int, current_time: float, info_staleness=0) -> bool:
+        # if currently available memory >= min_required_memory
+        used_memory = self.used_GPUmemory(current_time, info_staleness=info_staleness)
+        if GPU_MEMORY_SIZE - used_memory >= min_required_memory:
+            return True
+        
+        # if not executing any batches or executing a batch with no model,
+        # existing models can be evicted to make space
+        if (self.current_batch == [] or self.current_batch[0].model == None) and \
+            min_required_memory <= GPU_MEMORY_SIZE:
+            return True
+        
+        # if evicting all except current batch's required model can make enough space
+        if GPU_MEMORY_SIZE - self.current_batch[0].model.model_size >= min_required_memory:
+            return True
+
     def fetch_model(self, model, current_time):
         """
         Return: model transfer time required to execute the Task
@@ -67,9 +88,8 @@ def fetch_model(self, model, current_time):
         if model is None:
             return 0
         # First check if the model is stored locally: either on GPU, or systemRAM(home node)
-        w_models = self.get_model_history(current_time, info_staleness=0)
         # case1: if it is in local GPU already
-        if model in w_models:
+        if self.does_have_model(model, current_time):
             return 0
         fetch_time = 0
         fetch_time = SameMachineCPUtoGPU_delay(model.model_size)
@@ -78,6 +98,63 @@ def fetch_model(self, model, current_time):
         self.add_model_to_memory_history(model, current_time + fetch_time)
         eviction_time = self.evict_model_from_GPU(current_time + fetch_time)
         return fetch_time + eviction_time
+    
+    # Required to be overriden
+    def get_next_tasks(self, lookahead_count: int, current_time: float, info_staleness=0):
+        """
+            Returns a list of up to lookahead_count tasks in order of when they are
+            expected to begin execution on the worker.
+        """
+        return []
+
+
+    def _evict_models_from_GPU(self, models_to_evict, current_time):
+        eviction_duration = 0
+        required_current_model = self.current_batch[0].model if self.current_batch else None
+        for model in models_to_evict:
+            if model != required_current_model:
+                self.simulation.metadata_service.rm_model_cached_location(
+                    model, self.worker_id, current_time)
+                self.rm_model_in_memory_history(model, current_time)
+                eviction_duration += SameMachineGPUtoCPU_delay(model.model_size)
+        return eviction_duration
+
+
+    def evict_models_from_GPU_until(self, current_time: float, min_required_memory: int) -> float:
+        """
+            Evicts models from GPU according to lookahead eviction policy until at least
+            min_required_memory space is available. Returns time taken to execute model
+            evictions. 0 if min_required_memory could not be created.
+        """
+        if not self.can_fit(min_required_memory, current_time):
+            return 0
+        
+        curr_memory = GPU_MEMORY_SIZE - self.used_GPUmemory(current_time)
+       
+        models_in_GPU = self.get_model_history(current_time, info_staleness=0)
+        required_current_model = self.current_batch[0].model if self.current_batch else None
+        next_models = set(map(lambda task: task.model, self.get_next_tasks(3)))
+
+        models_to_evict = []
+
+        for i, model in enumerate(models_in_GPU):
+            # lowest priority models
+            if model not in next_models and model != required_current_model:
+                curr_memory -= model.model_size
+                models_to_evict.append(model)
+                if curr_memory >= min_required_memory:
+                    return self._evict_models_from_GPU(models_to_evict)
+
+        # next look at future models from latest -> earliest to be used
+        for model in next_models[::-1]:
+            if model in models_in_GPU and model != required_current_model:
+                curr_memory -= model.model_size
+                models_to_evict.append(model)
+                if curr_memory >= min_required_memory:
+                    return self._evict_models_from_GPU(models_to_evict)
+        
+        return 0
+    
 
     def evict_model_from_GPU(self, current_time):
         """
@@ -92,7 +169,7 @@ def evict_model_from_GPU(self, current_time):
             models_total_size += model.model_size
         eviction_index = 0
         eviction_duration = 0
-        while(models_total_size > GPU_MEMORY_SIZE):
+        while (models_total_size > GPU_MEMORY_SIZE):
             rm_model = models_in_GPU[eviction_index]
             self.simulation.metadata_service.rm_model_cached_location(
                 rm_model, self.worker_id, current_time)

From f9b8408cbda1936112d4cb37edf5b04008867d20 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Wed, 11 Jun 2025 15:50:13 -0400
Subject: [PATCH 21/41] eviction policy fix

---
 workers/worker.py | 43 ++++++++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

diff --git a/workers/worker.py b/workers/worker.py
index a1f71e9..92ed859 100644
--- a/workers/worker.py
+++ b/workers/worker.py
@@ -17,7 +17,7 @@ def __init__(self, simulation, num_free_slots, worker_id):
         # Keep track of the list of models sitting in GPU memory at time: 
         # {time-> list of model objects} : [ (time1,[model0,model1,]), (time2,[model1,...]),...]
         self.GPU_memory_models_history = []
-        
+        self.models_in_use = []
 
     def __hash__(self):
         return hash(self.worker_id)
@@ -61,6 +61,10 @@ def does_have_model(self, model, current_time: float, info_staleness=0) -> bool:
         w_models = self.get_model_history(current_time, info_staleness)
         return model in w_models
     
+    def copies_in_memory(self, model, current_time: float, info_staleness=0) -> int:
+        w_models = self.get_model_history(current_time, info_staleness)
+        return w_models.count(model)
+
     def can_fit(self, min_required_memory: int, current_time: float, info_staleness=0) -> bool:
         # if currently available memory >= min_required_memory
         used_memory = self.used_GPUmemory(current_time, info_staleness=info_staleness)
@@ -99,20 +103,18 @@ def fetch_model(self, model, current_time):
         eviction_time = self.evict_model_from_GPU(current_time + fetch_time)
         return fetch_time + eviction_time
     
-    # Required to be overriden
-    def get_next_tasks(self, lookahead_count: int, current_time: float, info_staleness=0):
+    # NOTE: REQUIRED OVERRIDE
+    def get_next_models(self, lookahead_count: int, current_time: float, info_staleness=0):
         """
-            Returns a list of up to lookahead_count tasks in order of when they are
-            expected to begin execution on the worker.
+            Returns a list of up to lookahead_count models in order of when they are
+            expected to be executed.
         """
         return []
 
-
     def _evict_models_from_GPU(self, models_to_evict, current_time):
         eviction_duration = 0
-        required_current_model = self.current_batch[0].model if self.current_batch else None
         for model in models_to_evict:
-            if model != required_current_model:
+            if model not in self.models_in_use:
                 self.simulation.metadata_service.rm_model_cached_location(
                     model, self.worker_id, current_time)
                 self.rm_model_in_memory_history(model, current_time)
@@ -125,6 +127,7 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory:
             Evicts models from GPU according to lookahead eviction policy until at least
             min_required_memory space is available. Returns time taken to execute model
             evictions. 0 if min_required_memory could not be created.
+            Assumes batches run in first task arrival order.
         """
         if not self.can_fit(min_required_memory, current_time):
             return 0
@@ -132,26 +135,20 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory:
         curr_memory = GPU_MEMORY_SIZE - self.used_GPUmemory(current_time)
        
         models_in_GPU = self.get_model_history(current_time, info_staleness=0)
-        required_current_model = self.current_batch[0].model if self.current_batch else None
-        next_models = set(map(lambda task: task.model, self.get_next_tasks(3)))
+        next_models = self.get_next_models(3, current_time)
+        models_in_GPU_sorted = sorted(
+            models_in_GPU, 
+            key=lambda m: next_models.index(m) if m in next_models else len(next_models),
+            reverse=True
+        )
 
         models_to_evict = []
-
-        for i, model in enumerate(models_in_GPU):
-            # lowest priority models
-            if model not in next_models and model != required_current_model:
-                curr_memory -= model.model_size
-                models_to_evict.append(model)
-                if curr_memory >= min_required_memory:
-                    return self._evict_models_from_GPU(models_to_evict)
-
-        # next look at future models from latest -> earliest to be used
-        for model in next_models[::-1]:
-            if model in models_in_GPU and model != required_current_model:
+        for model in models_in_GPU_sorted:
+            if model not in self.models_in_use:
                 curr_memory -= model.model_size
                 models_to_evict.append(model)
                 if curr_memory >= min_required_memory:
-                    return self._evict_models_from_GPU(models_to_evict)
+                    return self._evict_models_from_GPU(models_to_evict, current_time)
         
         return 0
     

From ae44ac7d5c4b1b2022b2c5a98823095cd557d541 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Wed, 11 Jun 2025 15:50:45 -0400
Subject: [PATCH 22/41] concurrent batch execution according to available GPU
 memory

---
 workers/taskworker.py | 173 +++++++++++++++++++++++-------------------
 1 file changed, 95 insertions(+), 78 deletions(-)

diff --git a/workers/taskworker.py b/workers/taskworker.py
index b9211d2..02d6df9 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -20,18 +20,31 @@ def add_task(self, current_time, task):
         """
         Add task into the local task queue
         """
-
-        # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived")
-
-        # Update when the task is sent to the worker
         assert (task.log.task_placed_on_worker_queue_timestamp <= current_time)
-        self.add_task_to_queue_history(task, current_time)
-        _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_type, task.max_wait_time)
-        return task_end_events
-
-    def free_slot(self, current_time):
+        self.add_task_to_queue_history(task, current_time) # Update when the task is sent to the worker
+        return self.maybe_start_task_for_type(current_time, task.task_type, task.max_wait_time)
+    
+    def get_next_models(self, lookahead_count: int, current_time: float, info_staleness=0):
+        if lookahead_count <= 0:
+            return []
+        
+        next_models = []
+        task_types_by_arrival, task_queues = self.get_sorted_task_types(current_time)
+        for task_type in task_types_by_arrival:
+            next_model = task_queues[task_type][0].model
+            if next_model != None and next_model not in next_models:
+                next_models.append(next_model)
+            if len(next_models) == lookahead_count:
+                return next_models
+
+        return next_models
+
+    def free_slot(self, current_time, model):
         """ Frees a slot on the worker and attempts to launch another task in that slot. """
-        self.num_free_slots += 1
+        # self.num_free_slots += 1
+        if model != None:
+            self.models_in_use.remove(model)
+        
         get_task_events = self.maybe_start_task_any(current_time)
         return get_task_events
 
@@ -68,7 +81,29 @@ def schedule_job_heft(self, current_time, job):
 
     #  ---------------------------  TASK EXECUTION  ----------------------
 
-    def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[(int, int)], dict[(int, int), list[Task]]]:
+    _CAN_RUN_NOW = 0
+    _CAN_RUN_ON_LOAD = 1
+    _CANNOT_RUN = 2
+
+    def can_run_task(self, current_time: float, model: Model, info_staleness=0) -> int:
+        """
+            Returns _CAN_RUN_NOW if model None, or model is on GPU and not currently in use.
+            Returns _CAN_RUN_ON_LOAD if model can be loaded onto the GPU (either by evicting 
+            existing models not in use or simply using available memory).
+            Returns _CANNOT_RUN otherwise.
+        """
+        if model == None: # doesn't use GPU
+            return self._CAN_RUN_NOW
+        # has >= 1 copies of model in memory that are not currently in use
+        elif self.does_have_model(model, current_time, info_staleness) and \
+             self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0:
+                return self._CAN_RUN_NOW
+        elif self.can_fit(model.model_size, current_time, info_staleness):
+            return self._CAN_RUN_ON_LOAD
+        else:
+            return self._CANNOT_RUN
+
+    def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[tuple[int, int]], dict[tuple[int, int], list[Task]]]:
         """
             Returns a list of all task_types with at least 1 task queued on this
             worker in order of when they are scheduled to execute (e.g. task queue
@@ -85,87 +120,69 @@ def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[(i
         )
 
         return task_types_by_arrival, task_queues
-
-
-    def maybe_start_task_any(self, current_time):
-        task_types, task_queues = self.get_sorted_task_types(current_time)
-        
-        if self.num_free_slots > 0:
-            for task_type in task_types:
-                first_queued_task = task_queues[task_type][0]
-                if (current_time >= first_queued_task.log.task_placed_on_worker_queue_timestamp):
-                    did_exec_batch, task_end_events = self.maybe_start_task_for_type(
-                        current_time, task_type, first_queued_task.max_wait_time
-                    )
-                    if did_exec_batch:
-                        return task_end_events
-                    # keep checking queue until batch is executed or tasks run out
-        
-        # if no queued tasks, maybe is never called and no wake up events are
-        # appended; in this case worker sleeps until a new task arrives
-        return []
     
+    def _maybe_start_batch(self, task_queue: list[Task], current_time: float) -> list[EventOrders]:
+        # only wake up if existing tasks to avoid congestion since
+        # empty queue will wake up on next task enqueue
+        if len(task_queue) == 0:
+            return []
 
-    def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> tuple[bool, list]:
-        """
-            Execute a batch if there are free slots available and at least 1 task queued.
-
-            Returns did_exec_batch : bool, task_end_events : list[Event]
-        """
+        batch_end_events = []
         latest_time = current_time
-        did_exec_batch = False
-
-        task_end_events = []
-        task_list = self.get_queue_history(current_time, task_type, info_staleness=0)
         
-        queued_tasks = queue.Queue()
-        [queued_tasks.put(task) for task in task_list]
-
-        # form largest batch < max_batch_size possible
-        batch = []
-        while (not queued_tasks.empty()) and self.num_free_slots > 0 and len(batch) < task_list[0].max_batch_size:
-            task = queued_tasks.get()
-            if (current_time >= task.log.task_placed_on_worker_queue_timestamp):
-                batch.append(task)
+        can_run = self.can_run_task(current_time, task_queue[0].model)
+        if can_run == self._CAN_RUN_ON_LOAD:
+            current_time += self.evict_models_from_GPU_until(
+                current_time, task_queue[0].model.model_size)
         
-        # full batch or max wait time has passed
-        if len(task_list) > 0 and self.num_free_slots > 0:
-            # print(f"[{current_time}] W{self.worker_id}: Batch of {task_list} executing")
-
-            batch_end_events, task_end_time = self.batch_execute(
-                batch, current_time)
+        if can_run == self._CAN_RUN_NOW or can_run == self._CAN_RUN_ON_LOAD:
+            queued_tasks = queue.Queue()
+            [queued_tasks.put(task) for task in task_queue]
+
+            # form largest batch < max_batch_size possible
+            batch = []
+            while (not queued_tasks.empty()) and len(batch) < task_queue[0].max_batch_size:
+                task = queued_tasks.get()
+                if (current_time >= task.log.task_placed_on_worker_queue_timestamp):
+                    batch.append(task)
             
-            # rm all tasks in batch
-            for task in batch:
-                self.rm_task_in_queue_history(task, current_time)
-
-            latest_time = task_end_time
-
-            did_exec_batch = True
-            task_end_events += batch_end_events
+            if len(batch) > 0:
+                batch_end_events, task_end_time = self.batch_execute(batch, current_time)
+                for task in batch: # rm all tasks in batch
+                    self.rm_task_in_queue_history(task, current_time)
+                latest_time = task_end_time
 
         # track next wake up time so old wake ups can be skipped
-        next_check_time = latest_time + task_wait_time
-        self.next_check_times[task_type] = next_check_time
+        next_check_time = latest_time + task_queue[0].max_wait_time
+        self.next_check_times[task_queue[0].task_type] = next_check_time
 
         # if idle, check again in wait time
         # NOTE: for some reason, appending to task_end_events does not always
         # lead to event being enqueued; thus we enqueue directly to sim queue here
-        self.simulation.event_queue.put(
-            EventOrders(
-                next_check_time,
-                WorkerWakeUpEvent(self, task_type, task_wait_time)
-            )
-        )
-
-        return did_exec_batch, task_end_events
+        self.simulation.event_queue.put(EventOrders(
+            next_check_time,
+            WorkerWakeUpEvent(self, 
+                                task_queue[0].task_type, 
+                                task_queue[0].max_wait_time)))
+        
+        return batch_end_events
+    
+    def maybe_start_task_any(self, current_time):
+        all_end_events = []
+        task_types, task_queues = self.get_sorted_task_types(current_time)
+        for task_type in task_types:
+            all_end_events += self._maybe_start_batch(task_queues[task_type], current_time)
+        return all_end_events
+    
+    def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> tuple[bool, list]:
+        task_queue = self.get_queue_history(current_time, task_type, info_staleness=0)
+        return self._maybe_start_batch(task_queue, current_time)
 
-    # modify to handle a batch of tasks:
-    # need to model batch execution duration
-    # transfer to next step should handle a list of tasks
     def batch_execute(self, tasks, current_time):
+        self.current_batch = tasks
+
         self.involved = True
-        self.num_free_slots -= 1
+        self.models_in_use.append(tasks[0].model)
         model_fetch_time = self.fetch_model(tasks[0].model, current_time)
 
         batch_index = 0
@@ -195,7 +212,7 @@ def batch_execute(self, tasks, current_time):
             task.log.task_execution_end_timestamp = task_end_time
 
         task_end_events.append(EventOrders(task_end_time, BatchEndEvent(
-            self, job_ids=job_ids, task_type=tasks[0].task_type
+            self, tasks[0].model, job_ids=job_ids, task_type=tasks[0].task_type
         )))
 
         return task_end_events, task_end_time

From c85596f008e955e4c0829422ab0fb066e0e12e43 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Wed, 11 Jun 2025 15:51:41 -0400
Subject: [PATCH 23/41] add model data to track models in use

---
 core/events.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/core/events.py b/core/events.py
index f3480d6..a6469f8 100644
--- a/core/events.py
+++ b/core/events.py
@@ -160,7 +160,7 @@ def __init__(self, worker, job_id=-1, task_id=-1):
         self.task_id = task_id  # integer representing the task_id
 
     def run(self, current_time):
-        return self.worker.free_slot(current_time)
+        return self.worker.free_slot(current_time, self)
 
     def to_string(self):
         return "[Task End (Job {} - Task {}) at Worker {}] ===".format(self.job_id, self.task_id, self.worker.worker_id)
@@ -169,13 +169,14 @@ def to_string(self):
 class BatchEndEvent(Event):
     """ Event to signify that a BATCH has been performed by the WORKER. """
 
-    def __init__(self, worker, job_ids=[], task_type=(-1, -1)):
+    def __init__(self, worker, model, job_ids=[], task_type=(-1, -1)):
         self.worker = worker
+        self.model = model
         self.job_ids = job_ids    # integers representing the job_ids
         self.task_type = task_type # (workflow_id, task_id)
 
     def run(self, current_time):
-        return self.worker.free_slot(current_time)
+        return self.worker.free_slot(current_time, self.model)
 
     def to_string(self):
         jobs = ",".join([str(id) for id in self.job_ids])
@@ -229,10 +230,8 @@ def __init__(self, worker, task_id, task_max_wait_time):
 
     def run(self, current_time):
         if self.will_run(current_time):
-            _, task_end_events = self.worker.maybe_start_task_for_type(
-                current_time, self.task_id, self.task_max_wait_time
-            )
-            return task_end_events
+            return self.worker.maybe_start_task_for_type(
+                current_time, self.task_id, self.task_max_wait_time)
         return []
 
     def to_string(self):

From 3d5d72cb15f94f4b2dccbf9dc383801a866e729b Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Thu, 12 Jun 2025 10:57:09 -0400
Subject: [PATCH 24/41] can fit helper fix

---
 workers/taskworker.py |  3 ++-
 workers/worker.py     | 11 ++++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/workers/taskworker.py b/workers/taskworker.py
index 02d6df9..ce24381 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -182,7 +182,8 @@ def batch_execute(self, tasks, current_time):
         self.current_batch = tasks
 
         self.involved = True
-        self.models_in_use.append(tasks[0].model)
+        if tasks[0].model != None:
+            self.models_in_use.append(tasks[0].model)
         model_fetch_time = self.fetch_model(tasks[0].model, current_time)
 
         batch_index = 0
diff --git a/workers/worker.py b/workers/worker.py
index 92ed859..a3fcbe8 100644
--- a/workers/worker.py
+++ b/workers/worker.py
@@ -17,7 +17,7 @@ def __init__(self, simulation, num_free_slots, worker_id):
         # Keep track of the list of models sitting in GPU memory at time: 
         # {time-> list of model objects} : [ (time1,[model0,model1,]), (time2,[model1,...]),...]
         self.GPU_memory_models_history = []
-        self.models_in_use = []
+        self.models_in_use = [] # models in use by a currently executing batch
 
     def __hash__(self):
         return hash(self.worker_id)
@@ -71,14 +71,11 @@ def can_fit(self, min_required_memory: int, current_time: float, info_staleness=
         if GPU_MEMORY_SIZE - used_memory >= min_required_memory:
             return True
         
-        # if not executing any batches or executing a batch with no model,
-        # existing models can be evicted to make space
-        if (self.current_batch == [] or self.current_batch[0].model == None) and \
-            min_required_memory <= GPU_MEMORY_SIZE:
+        if self.models_in_use == [] and min_required_memory <= GPU_MEMORY_SIZE:
             return True
         
-        # if evicting all except current batch's required model can make enough space
-        if GPU_MEMORY_SIZE - self.current_batch[0].model.model_size >= min_required_memory:
+        # if evicting all except current required models can make enough space
+        if GPU_MEMORY_SIZE - sum(map(lambda m: m.model_size, self.models_in_use)) >= min_required_memory:
             return True
 
     def fetch_model(self, model, current_time):

From 00dc474f7c7c13d0e1dd587189f08bec4df9d4eb Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Thu, 12 Jun 2025 11:16:59 -0400
Subject: [PATCH 25/41] merge

---
 core/simulation.py             |  1 +
 experiments/run_experiments.py | 10 +++++++++-
 workers/taskworker.py          | 19 +++++++++++++++++--
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/core/simulation.py b/core/simulation.py
index 22b3f1b..73df7c5 100644
--- a/core/simulation.py
+++ b/core/simulation.py
@@ -44,6 +44,7 @@ def __init__(
         self.result_to_export = pd.DataFrame()
         self.tasks_logging_times = pd.DataFrame()
         self.event_log = pd.DataFrame(columns=["time", "event"])
+        self.batch_exec_log = pd.DataFrame(columns=["time", "worker_id", "workflow_id", "task_id", "batch_size", "model_exec_time", "batch_exec_time", "job_ids"])
 
         print("---- SIMULATION : " + self.simulation_name + "----")
         self.produce_breakdown =  produce_breakdown
diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py
index a14154e..a14bf70 100644
--- a/experiments/run_experiments.py
+++ b/experiments/run_experiments.py
@@ -12,7 +12,7 @@
 
 # experiment_schedulers options: centralheft | decentralheft | hashtask
 experiment_schedulers = []
-plotting_job_type_list = [0, 1, 2, 3]
+plotting_job_type_list = [0]
 # plotting_job_type_list = [2,3]
 np.random.seed(42)
 
@@ -95,3 +95,11 @@
         tasks_logging_times = sim.tasks_logging_times
         tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "loadDelay_" + str(
             LOAD_INFORMATION_STALENESS) + "_placementDelay_" + str(PLACEMENT_INFORMATION_STALENESS) + ".csv")
+        
+        sim.batch_exec_log.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "batch_log.csv")
+        
+        worker_model_histories = pd.concat(list(map(lambda w: w.model_history_log, sim.workers)), 
+                                           keys=list(map(lambda w: w.worker_id, sim.workers)), 
+                                           names=['worker_id']).reset_index(level='worker_id')
+        worker_model_histories = worker_model_histories.sort_values(by="start_time")
+        worker_model_histories.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "model_history_log.csv")
diff --git a/workers/taskworker.py b/workers/taskworker.py
index ce24381..3ebeb39 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -20,6 +20,10 @@ def add_task(self, current_time, task):
         """
         Add task into the local task queue
         """
+
+        # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived")
+
+        # Update when the task is sent to the worker
         assert (task.log.task_placed_on_worker_queue_timestamp <= current_time)
         self.add_task_to_queue_history(task, current_time) # Update when the task is sent to the worker
         return self.maybe_start_task_for_type(current_time, task.task_type, task.max_wait_time)
@@ -95,8 +99,7 @@ def can_run_task(self, current_time: float, model: Model, info_staleness=0) -> i
         if model == None: # doesn't use GPU
             return self._CAN_RUN_NOW
         # has >= 1 copies of model in memory that are not currently in use
-        elif self.does_have_model(model, current_time, info_staleness) and \
-             self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0:
+        elif self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0:
                 return self._CAN_RUN_NOW
         elif self.can_fit(model.model_size, current_time, info_staleness):
             return self._CAN_RUN_ON_LOAD
@@ -184,6 +187,7 @@ def batch_execute(self, tasks, current_time):
         self.involved = True
         if tasks[0].model != None:
             self.models_in_use.append(tasks[0].model)
+        
         model_fetch_time = self.fetch_model(tasks[0].model, current_time)
 
         batch_index = 0
@@ -212,6 +216,17 @@ def batch_execute(self, tasks, current_time):
             task.log.task_execution_start_timestamp = current_time + model_fetch_time
             task.log.task_execution_end_timestamp = task_end_time
 
+        self.simulation.batch_exec_log.loc[len(self.simulation.batch_exec_log)] = {
+            "time": current_time,
+            "worker_id": self.worker_id,
+            "workflow_id": tasks[0].task_type[0],
+            "task_id": tasks[0].task_id,
+            "batch_size": len(tasks),
+            "model_exec_time": tasks[0].batch_exec_time[batch_index],
+            "batch_exec_time": model_fetch_time + tasks[0].batch_exec_time[batch_index],
+            "job_ids": job_ids
+        }
+
         task_end_events.append(EventOrders(task_end_time, BatchEndEvent(
             self, tasks[0].model, job_ids=job_ids, task_type=tasks[0].task_type
         )))

From 2c916e309bba23559b94f417b0e2b9ba9ebdcd17 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Thu, 12 Jun 2025 13:07:40 -0400
Subject: [PATCH 26/41] model logging & eviction fix

---
 workers/worker.py | 71 +++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/workers/worker.py b/workers/worker.py
index a3fcbe8..7260498 100644
--- a/workers/worker.py
+++ b/workers/worker.py
@@ -2,7 +2,8 @@
 from core.config import *
 from core.network import *
 from core.config import *
-import sys
+
+import pandas as pd
 
 
 class Worker(object):
@@ -19,6 +20,9 @@ def __init__(self, simulation, num_free_slots, worker_id):
         self.GPU_memory_models_history = []
         self.models_in_use = [] # models in use by a currently executing batch
 
+        self.model_history_log = pd.DataFrame(columns=["start_time", "end_time",
+                                                       "model_id", "placed_or_evicted"])
+
     def __hash__(self):
         return hash(self.worker_id)
 
@@ -66,17 +70,33 @@ def copies_in_memory(self, model, current_time: float, info_staleness=0) -> int:
         return w_models.count(model)
 
     def can_fit(self, min_required_memory: int, current_time: float, info_staleness=0) -> bool:
+        # models currently being fetched = models in use - models loaded on GPU
+        loaded_models = self.get_model_history(current_time, info_staleness)
+        fetching_models = []
+        for model in self.models_in_use:
+            if model in loaded_models:
+                loaded_models.remove(model)
+            else:
+                fetching_models.append(model)
+
+        # loaded models + models currently being fetched
+        used_memory = self.used_GPUmemory(current_time, info_staleness=info_staleness) + \
+                      sum([model.model_size for model in fetching_models])
+        
         # if currently available memory >= min_required_memory
-        used_memory = self.used_GPUmemory(current_time, info_staleness=info_staleness)
         if GPU_MEMORY_SIZE - used_memory >= min_required_memory:
             return True
         
+        # if no batches/current batches do not use GPU
         if self.models_in_use == [] and min_required_memory <= GPU_MEMORY_SIZE:
             return True
         
-        # if evicting all except current required models can make enough space
-        if GPU_MEMORY_SIZE - sum(map(lambda m: m.model_size, self.models_in_use)) >= min_required_memory:
+        # if evicting all except current required models & models being fetched can make enough space
+        if GPU_MEMORY_SIZE - sum(map(lambda m: m.model_size, self.models_in_use)) - \
+            sum(map(lambda m: m.model_size, fetching_models)) >= min_required_memory:
             return True
+        
+        return False
 
     def fetch_model(self, model, current_time):
         """
@@ -92,13 +112,21 @@ def fetch_model(self, model, current_time):
         # case1: if it is in local GPU already
         if self.does_have_model(model, current_time):
             return 0
+        
         fetch_time = 0
         fetch_time = SameMachineCPUtoGPU_delay(model.model_size)
+
+        self.model_history_log.loc[len(self.model_history_log)] = {
+            "start_time": current_time,
+            "end_time": current_time + fetch_time, 
+            "model_id": model.model_id,
+            "placed_or_evicted": "placed"
+        }
+
         self.simulation.metadata_service.add_model_cached_location(
             model, self.worker_id, current_time + fetch_time)
         self.add_model_to_memory_history(model, current_time + fetch_time)
-        eviction_time = self.evict_model_from_GPU(current_time + fetch_time)
-        return fetch_time + eviction_time
+        return fetch_time
     
     # NOTE: REQUIRED OVERRIDE
     def get_next_models(self, lookahead_count: int, current_time: float, info_staleness=0):
@@ -116,6 +144,13 @@ def _evict_models_from_GPU(self, models_to_evict, current_time):
                     model, self.worker_id, current_time)
                 self.rm_model_in_memory_history(model, current_time)
                 eviction_duration += SameMachineGPUtoCPU_delay(model.model_size)
+
+                self.model_history_log.loc[len(self.model_history_log)] = {
+                    "start_time": current_time,
+                    "end_time": current_time + eviction_duration, 
+                    "model_id": model.model_id,
+                    "placed_or_evicted": "evicted"
+                }
         return eviction_duration
 
 
@@ -148,30 +183,6 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory:
                     return self._evict_models_from_GPU(models_to_evict, current_time)
         
         return 0
-    
-
-    def evict_model_from_GPU(self, current_time):
-        """
-        Do nothing if current cached models didn't exceed the GPU memory
-        remove this information to 2 histories:  
-            1. model_history on worker
-            2. cache_history on metadata_service
-        """
-        models_in_GPU = self.get_model_history(current_time, info_staleness=0)
-        models_total_size = 0
-        for model in models_in_GPU:
-            models_total_size += model.model_size
-        eviction_index = 0
-        eviction_duration = 0
-        while (models_total_size > GPU_MEMORY_SIZE):
-            rm_model = models_in_GPU[eviction_index]
-            self.simulation.metadata_service.rm_model_cached_location(
-                rm_model, self.worker_id, current_time)
-            self.rm_model_in_memory_history(rm_model, current_time)
-            models_total_size -= rm_model.model_size
-            eviction_index += 1
-            eviction_duration += SameMachineGPUtoCPU_delay(rm_model.model_size)
-        return eviction_duration
 
     # ------------------------- cached model history update helper functions ---------------
     def add_model_to_memory_history(self, model, current_time):

From b04b800945dae4aaa6e26b6a48f110000354aafb Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Fri, 6 Jun 2025 15:36:38 -0400
Subject: [PATCH 27/41] merge: job logging

---
 core/events.py        | 19 +++++++++++++++++++
 workers/taskworker.py |  7 +++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/core/events.py b/core/events.py
index a6469f8..4ededcc 100644
--- a/core/events.py
+++ b/core/events.py
@@ -166,6 +166,25 @@ def to_string(self):
         return "[Task End (Job {} - Task {}) at Worker {}] ===".format(self.job_id, self.task_id, self.worker.worker_id)
 
 
+class BatchStartEvent(Event):
+    """ 
+        Event to signify that a BATCH has started executing in WORKER.
+        Only for logging purposes.
+    """
+
+    def __init__(self, worker, job_ids=[], task_type=(-1, -1)):
+        self.worker = worker
+        self.job_ids = job_ids      # list[int] with the job_ids in the batch
+        self.task_type = task_type  # (workflow_id, task_id) identifying the batch task_type
+
+    def run(self, current_time):
+        return []
+
+    def to_string(self):
+        jobs = ",".join([str(id) for id in self.job_ids])
+        return f"[Batch Start (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]"
+
+
 class BatchEndEvent(Event):
     """ Event to signify that a BATCH has been performed by the WORKER. """
 
diff --git a/workers/taskworker.py b/workers/taskworker.py
index 3ebeb39..96db516 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -21,7 +21,7 @@ def add_task(self, current_time, task):
         Add task into the local task queue
         """
 
-        # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived")
+        print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived")
 
         # Update when the task is sent to the worker
         assert (task.log.task_placed_on_worker_queue_timestamp <= current_time)
@@ -182,7 +182,7 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) ->
         return self._maybe_start_batch(task_queue, current_time)
 
     def batch_execute(self, tasks, current_time):
-        self.current_batch = tasks
+        assert(len(tasks) > 0) # cannot launch empty batch
 
         self.involved = True
         if tasks[0].model != None:
@@ -227,6 +227,9 @@ def batch_execute(self, tasks, current_time):
             "job_ids": job_ids
         }
 
+        task_end_events.append(EventOrders(current_time + model_fetch_time, BatchStartEvent(
+            self, job_ids=job_ids, task_type=tasks[0].task_type
+        )))
         task_end_events.append(EventOrders(task_end_time, BatchEndEvent(
             self, tasks[0].model, job_ids=job_ids, task_type=tasks[0].task_type
         )))

From 086256e6031f446fa2705ecb1b12999ebe2db95d Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Fri, 6 Jun 2025 15:36:38 -0400
Subject: [PATCH 28/41] merge fix

---
 core/events.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/core/events.py b/core/events.py
index 4ededcc..3bb6202 100644
--- a/core/events.py
+++ b/core/events.py
@@ -185,6 +185,22 @@ def to_string(self):
         return f"[Batch Start (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]"
 
 
+class BatchEndEvent(Event):
+    """ Event to signify that a BATCH has been performed by the WORKER. """
+
+    def __init__(self, worker, job_ids=[], task_type=(-1, -1)):
+        self.worker = worker
+        self.job_ids = job_ids      # list[int] with the job_ids in the batch
+        self.task_type = task_type  # (workflow_id, task_id) identifying the batch task_type
+
+    def run(self, current_time):
+        return []
+
+    def to_string(self):
+        jobs = ",".join([str(id) for id in self.job_ids])
+        return f"[Batch Start (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]"
+
+
 class BatchEndEvent(Event):
     """ Event to signify that a BATCH has been performed by the WORKER. """
 

From 11a9557fe5d318a0e575186086c80a8c7daac7b4 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 20 May 2025 11:13:34 -0400
Subject: [PATCH 29/41] merge: job logging

---
 core/simulation.py             | 13 +++++++------
 experiments/parse_results.py   |  2 +-
 experiments/run_experiments.py |  7 +++++++
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/core/simulation.py b/core/simulation.py
index 73df7c5..d9b538d 100644
--- a/core/simulation.py
+++ b/core/simulation.py
@@ -106,8 +106,8 @@ def run_finish(self, last_time, by_job_type=False):
     def produce_time_breakdown_results(self, completed_jobs):
 
         dataframe = pd.DataFrame(columns=["job_id", "load_info_staleness", "placement_info_staleness", "req_inter_arrival_delay",
-                                          "workflow_type", "scheduler_type", "slowdown", "response_time"])
-        dataframe_tasks_log = pd.DataFrame(columns=["workflow_type", "task_id", "time_to_buffer", "dependency_wait_time",
+                                          "workflow_type", "job_create_time", "scheduler_type", "slowdown", "response_time"])
+        dataframe_tasks_log = pd.DataFrame(columns=["workflow_type", "task_id", "task_arrival_time", "task_start_exec_time", "time_to_buffer", "dependency_wait_time",
                                                     "time_spent_in_queue", "model_fetching_time", "execution_time"])
 
         for index, completed_job in enumerate(completed_jobs):
@@ -122,7 +122,7 @@ def produce_time_breakdown_results(self, completed_jobs):
             if "JOB_CREATION_INTERVAL" in WORKFLOW_LIST[completed_job.job_type_id]:
                 job_creation_interval = WORKFLOW_LIST[completed_job.job_type_id]["JOB_CREATION_INTERVAL"]
             dataframe.loc[index] = [index, LOAD_INFORMATION_STALENESS, PLACEMENT_INFORMATION_STALENESS, job_creation_interval, completed_job.job_type_id,
-                                    self.simulation_name, slowdown, response_time]
+                                    completed_job.create_time, self.simulation_name, slowdown, response_time]
 
         task_index = 0
         for job in completed_jobs:
@@ -143,9 +143,10 @@ def produce_time_breakdown_results(self, completed_jobs):
                 assert model_fetching_time >= 0
                 assert execution_time >= 0
 
-                dataframe_tasks_log.loc[task_index] = [job.job_type_id, task.task_id, time_to_buffer,
-                                                       dependency_wait_time, time_spent_in_queue, model_fetching_time, execution_time]
+                dataframe_tasks_log.loc[task_index] = [job.job_type_id, task.task_id, task.log.task_arrival_at_worker_buffer_timestamp, 
+                                                       task.log.task_execution_start_timestamp,time_to_buffer, dependency_wait_time, 
+                                                       time_spent_in_queue, model_fetching_time, execution_time]
                 task_index += 1
 
         self.tasks_logging_times = dataframe_tasks_log
-        self.result_to_export = dataframe
\ No newline at end of file
+        self.result_to_export = dataframe
diff --git a/experiments/parse_results.py b/experiments/parse_results.py
index ee2ecfe..3c37dc8 100644
--- a/experiments/parse_results.py
+++ b/experiments/parse_results.py
@@ -155,4 +155,4 @@ def gen_stats(job_df, event_df):
 
 plot_batch_size_bar_chart(events_df, out_path)
 plot_batch_size_vs_batch_start(events_df, out_path)
-plot_response_time_vs_arrival_time(job_df, out_path)
\ No newline at end of file
+plot_response_time_vs_arrival_time(job_df, out_path)
diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py
index a14bf70..26c1c5d 100644
--- a/experiments/run_experiments.py
+++ b/experiments/run_experiments.py
@@ -56,10 +56,14 @@
         event_log.to_csv(OUTPUT_FILE_NAMES["centralheft"] + "events_by_time.csv")
 
         # result_to_export = sim.result_to_export
+        result_to_export = sim.result_to_export
+        result_to_export.to_csv(OUTPUT_FILE_NAMES["centralheft"] + "job_breakdown.csv")
+
         tasks_logging_times = sim.tasks_logging_times
         tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["centralheft"] + "loadDelay_" + str(
             LOAD_INFORMATION_STALENESS) + "_placementDelay_" + str(PLACEMENT_INFORMATION_STALENESS) + ".csv")
 
+   
     if "hashtask" in experiment_schedulers:
         OUTPUT_FILENAME = "hashtask"
         sim = Simulation_central(simulation_name="hashtask", job_split="PER_TASK",
@@ -92,6 +96,9 @@
         event_log = sim.event_log
         event_log.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "events_by_time.csv")
         
+        result_to_export = sim.result_to_export
+        result_to_export.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "job_breakdown.csv")
+
         tasks_logging_times = sim.tasks_logging_times
         tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "loadDelay_" + str(
             LOAD_INFORMATION_STALENESS) + "_placementDelay_" + str(PLACEMENT_INFORMATION_STALENESS) + ".csv")

From e5b5516ae6e3c91867ab272c89a477204dfdaa14 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Thu, 12 Jun 2025 13:35:01 -0400
Subject: [PATCH 30/41] plot model loading

---
 experiments/parse_results.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/experiments/parse_results.py b/experiments/parse_results.py
index 3c37dc8..dbd484f 100644
--- a/experiments/parse_results.py
+++ b/experiments/parse_results.py
@@ -144,6 +144,18 @@ def gen_stats(job_df, event_df):
     # print(f"TPUT: {len(job_df) / event_df.loc[len(events_df)-1]["time"]}")
     
 
+def plot_model_loading_histogram(model_df, out_path):
+    fig = plt.figure(figsize=(8, 6))
+
+    plt.hist(model_df["start_time"], bins=15, edgecolor='black')
+
+    plt.xlabel("Time")
+    plt.ylabel("Number of models loaded")
+    plt.title(f"Model Loading Over Time")
+
+    plt.savefig(os.path.join(out_path, f"model_loading_hist.png"))
+
+
 results_dir_path = sys.argv[1] # results/<scheduler_type>
 out_path = sys.argv[2] if len(sys.argv) > 2 else "parsed_results"
 
@@ -152,7 +164,9 @@ def gen_stats(job_df, event_df):
 job_df = pd.read_csv(os.path.join(results_dir_path, "job_breakdown.csv"))
 # task_df = pd.read_csv(os.path.join(results_dir_path, "loadDelay_1_placementDelay_1.csv"))
 events_df = pd.read_csv(os.path.join(results_dir_path, 'events_by_time.csv'))
+model_df = pd.read_csv(os.path.join(results_dir_path, "model_history_log.csv"))
 
+plot_model_loading_histogram(model_df, out_path)
 plot_batch_size_bar_chart(events_df, out_path)
 plot_batch_size_vs_batch_start(events_df, out_path)
 plot_response_time_vs_arrival_time(job_df, out_path)

From 8547b8cb874c3ef9f4fd24b03208f53e0401f42c Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Thu, 12 Jun 2025 14:27:04 -0400
Subject: [PATCH 31/41] fetch fix and policy choice

---
 workers/worker.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/workers/worker.py b/workers/worker.py
index 7260498..6182f08 100644
--- a/workers/worker.py
+++ b/workers/worker.py
@@ -106,11 +106,9 @@ def fetch_model(self, model, current_time):
             1. model_history on worker
             2. cache_history on metadata_service
         """
-        if model is None:
-            return 0
-        # First check if the model is stored locally: either on GPU, or systemRAM(home node)
-        # case1: if it is in local GPU already
-        if self.does_have_model(model, current_time):
+        # check if exists a copy of the model not currently in use
+        if model is None or \
+            self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0:
             return 0
         
         fetch_time = 0
@@ -153,10 +151,12 @@ def _evict_models_from_GPU(self, models_to_evict, current_time):
                 }
         return eviction_duration
 
+    LOOKAHEAD_EVICTION = 0
+    FCFS_EVICTION = 1
 
-    def evict_models_from_GPU_until(self, current_time: float, min_required_memory: int) -> float:
+    def evict_models_from_GPU_until(self, current_time: float, min_required_memory: int, policy: int) -> float:
         """
-            Evicts models from GPU according to lookahead eviction policy until at least
+            Evicts models from GPU according to FCFS or lookahead eviction policy until at least
             min_required_memory space is available. Returns time taken to execute model
             evictions. 0 if min_required_memory could not be created.
             Assumes batches run in first task arrival order.
@@ -167,15 +167,16 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory:
         curr_memory = GPU_MEMORY_SIZE - self.used_GPUmemory(current_time)
        
         models_in_GPU = self.get_model_history(current_time, info_staleness=0)
-        next_models = self.get_next_models(3, current_time)
-        models_in_GPU_sorted = sorted(
-            models_in_GPU, 
-            key=lambda m: next_models.index(m) if m in next_models else len(next_models),
-            reverse=True
-        )
+        if policy == self.LOOKAHEAD_EVICTION:
+            next_models = self.get_next_models(3, current_time)
+            models_in_GPU = sorted(
+                models_in_GPU, 
+                key=lambda m: next_models.index(m) if m in next_models else len(next_models),
+                reverse=True
+            )
 
         models_to_evict = []
-        for model in models_in_GPU_sorted:
+        for model in models_in_GPU:
             if model not in self.models_in_use:
                 curr_memory -= model.model_size
                 models_to_evict.append(model)

From 50fe4765205b8a12b4e52f05d3bc4d962ce48120 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Thu, 12 Jun 2025 14:27:25 -0400
Subject: [PATCH 32/41] policy choice

---
 workers/taskworker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workers/taskworker.py b/workers/taskworker.py
index 96db516..6ef5d4e 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -21,7 +21,7 @@ def add_task(self, current_time, task):
         Add task into the local task queue
         """
 
-        print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived")
+        # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived")
 
         # Update when the task is sent to the worker
         assert (task.log.task_placed_on_worker_queue_timestamp <= current_time)
@@ -136,7 +136,7 @@ def _maybe_start_batch(self, task_queue: list[Task], current_time: float) -> lis
         can_run = self.can_run_task(current_time, task_queue[0].model)
         if can_run == self._CAN_RUN_ON_LOAD:
             current_time += self.evict_models_from_GPU_until(
-                current_time, task_queue[0].model.model_size)
+                current_time, task_queue[0].model.model_size, self.LOOKAHEAD_EVICTION)
         
         if can_run == self._CAN_RUN_NOW or can_run == self._CAN_RUN_ON_LOAD:
             queued_tasks = queue.Queue()

From adf3c7016b36cf72353dac5f488cebe182a99c39 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Thu, 12 Jun 2025 15:23:00 -0400
Subject: [PATCH 33/41] fixed loading plot; added eviction plot

---
 experiments/parse_results.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/experiments/parse_results.py b/experiments/parse_results.py
index dbd484f..e2923a3 100644
--- a/experiments/parse_results.py
+++ b/experiments/parse_results.py
@@ -147,7 +147,7 @@ def gen_stats(job_df, event_df):
 def plot_model_loading_histogram(model_df, out_path):
     fig = plt.figure(figsize=(8, 6))
 
-    plt.hist(model_df["start_time"], bins=15, edgecolor='black')
+    plt.hist(model_df[model_df["placed_or_evicted"] == "placed"]["start_time"], bins=15, edgecolor='black')
 
     plt.xlabel("Time")
     plt.ylabel("Number of models loaded")
@@ -156,6 +156,18 @@ def plot_model_loading_histogram(model_df, out_path):
     plt.savefig(os.path.join(out_path, f"model_loading_hist.png"))
 
 
+def plot_model_eviction_histogram(model_df, out_path):
+    fig = plt.figure(figsize=(8, 6))
+
+    plt.hist(model_df[model_df["placed_or_evicted"] == "evicted"]["start_time"], bins=15, edgecolor='black')
+
+    plt.xlabel("Time")
+    plt.ylabel("Number of models evicted")
+    plt.title(f"Model Eviction Over Time")
+
+    plt.savefig(os.path.join(out_path, f"model_eviction_hist.png"))
+
+
 results_dir_path = sys.argv[1] # results/<scheduler_type>
 out_path = sys.argv[2] if len(sys.argv) > 2 else "parsed_results"
 
@@ -167,6 +179,7 @@ def plot_model_loading_histogram(model_df, out_path):
 model_df = pd.read_csv(os.path.join(results_dir_path, "model_history_log.csv"))
 
 plot_model_loading_histogram(model_df, out_path)
+plot_model_eviction_histogram(model_df, out_path)
 plot_batch_size_bar_chart(events_df, out_path)
 plot_batch_size_vs_batch_start(events_df, out_path)
 plot_response_time_vs_arrival_time(job_df, out_path)

From 70c048991da31c68d483cfac33db2e58276766f8 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 17 Jun 2025 10:47:52 -0400
Subject: [PATCH 34/41] gpu state refactor for accurate allocation handling

---
 workers/model_state.py | 192 +++++++++++++++++++++++++++++++++++++++++
 workers/taskworker.py  | 141 ++++++++++++++++--------------
 workers/worker.py      | 192 ++++++++---------------------------------
 3 files changed, 304 insertions(+), 221 deletions(-)
 create mode 100644 workers/model_state.py

diff --git a/workers/model_state.py b/workers/model_state.py
new file mode 100644
index 0000000..d6acefe
--- /dev/null
+++ b/workers/model_state.py
@@ -0,0 +1,192 @@
+import copy
+
+from core.config import *
+from core.model import Model
+
+
+class ModelState:
+    PLACED = 0
+    IN_FETCH = 1
+    IN_EVICT = 2
+
+    def __init__(self, model: Model, state: int, is_reserved_for_batch=True):
+        assert(state in [self.PLACED, self.IN_FETCH, self.IN_EVICT])
+        self.model = model
+        self.state = state
+        self.is_reserved_for_batch = is_reserved_for_batch
+
+    def __eq__(self, value):
+        return type(value) == ModelState and self.model == value.model and self.state == value.state
+    
+    def __str__(self):
+        return f"<[{self._state_to_str()}] [{"NOT " if not self.is_reserved_for_batch else ""}IN USE] Model ID: {self.model.model_id}>"
+    
+    def __repr__(self):
+        return self.__str__()
+    
+    def _state_to_str(self):
+        if self.state == self.PLACED: return "Placed"
+        elif self.state == self.IN_FETCH: return "Fetching"
+        elif self.state == self.IN_EVICT: return "Evicting"
+
+
+class GPUState(object):
+    def __init__(self):
+        # sorted (asc) list of GPU states [(time, [model states])]
+        self._model_states = []
+
+    def reserved_memory(self, time: float) -> float:
+        """
+            Returns the total GPU memory that is currently in use, either
+            for currently placed models, models that are being fetched, or
+            models that are being evicted.
+        """
+        return sum(state.model.model_size for state in self.state_at(time))
+    
+    def available_memory(self, time: float) -> float:
+        """
+            Returns total GPU memory not occupied by a model, reserved for
+            a model being fetched, or used by a model being evicted.
+        """
+        return GPU_MEMORY_SIZE - self.reserved_memory(time)
+
+    def can_fetch_model(self, model: Model, time: float) -> bool:
+        """
+            Returns True if a new copy of [model] can be fetched to the
+            GPU as is with no evictions.
+        """
+        return self.available_memory(time) >= model.model_size
+    
+    def can_fetch_model_on_eviction(self, model: Model, time: float) -> bool:
+        """
+            Return True if a new copy of [model] can be fetched to the GPU
+            upon evicting some number of placed models not in use.
+        """
+        # cannot use space occupied by models currently being fetched/evicted or used
+        return (self.available_memory(time) + \
+                sum(state.model.model_size for state in self.state_at(time) 
+                    if state.state == ModelState.PLACED and not state.is_reserved_for_batch)) >= model.model_size
+    
+    def _insert_state_marker(self, marker_time: float, at_marker_modify, post_marker_modify):
+        """
+            Internal helper to update states at exactly [marker_time] with 
+            [at_marker_modify: (time, old_states) -> new_states] and states
+            after [marker_time] with 
+            [post_marker_modify: (time, old_states) -> new_states].
+        """
+        did_add_marker = False
+        for i in range(len(self._model_states)-1, -1, -1):
+            (timestamp, states) = self._model_states[i]
+            if timestamp == marker_time:
+                at_marker_modify(timestamp, states)
+                did_add_marker = True
+            elif timestamp < marker_time:
+                if not did_add_marker:
+                    state_copy = copy.deepcopy(states)
+                    at_marker_modify(timestamp, state_copy)
+                    self._model_states.insert(i+1, (marker_time, state_copy))
+                    did_add_marker = True
+                return
+            else:
+                post_marker_modify(timestamp, states)
+
+        if not did_add_marker:
+            states = []
+            at_marker_modify(marker_time, states)
+            self._model_states.insert(0, (marker_time, states))
+
+    def fetch_model(self, model: Model, start_time: float, fetch_time: float):
+        """
+            Fetches a new copy of [model] to the GPU if there is enough available
+            memory without additional evictions.
+        """
+        assert(model != None)
+        assert(self.can_fetch_model(model, start_time))
+
+        fetch_end_time = start_time + fetch_time
+
+        if len(self._model_states) == 0:
+            # mark when fetch begins and ends
+            self._model_states.append((start_time, [ModelState(model, ModelState.IN_FETCH)]))
+            self._model_states.append((fetch_end_time, [ModelState(model, ModelState.PLACED)]))
+            return
+        
+        # add fetch end marker
+        self._insert_state_marker(fetch_end_time, 
+                                  lambda _, states: states.append(ModelState(model, ModelState.PLACED)),
+                                  lambda _, states: states.append(ModelState(model, ModelState.PLACED)))
+        
+        # add fetch start marker
+        self._insert_state_marker(start_time, 
+                                  lambda _, states: states.append(ModelState(model, ModelState.IN_FETCH)),
+                                  lambda t, states: states.append(ModelState(model, ModelState.IN_FETCH)) if t < fetch_end_time else None)
+
+    
+    def evict_model(self, model: Model, start_time: float, evict_time: float):
+        assert(model in self.placed_models(start_time))
+
+        eviction_end_time = start_time + evict_time
+
+        # remove model from all later timestamps
+        def _remove_model(timestamp, states):
+            for state in states:
+                if state.state == ModelState.PLACED and state.model == model:
+                    states.remove(state)
+                    return
+            assert(False)
+
+        self._insert_state_marker(eviction_end_time, _remove_model, _remove_model)
+
+        def _begin_model_eviction(timestamp, states):
+            for state in states:
+                if state.state == ModelState.PLACED and state.model == model and not state.is_reserved_for_batch:
+                    state.state = ModelState.IN_EVICT
+                    return
+            assert(False) # should not happen: no model exists to evict
+
+        # add eviction start marker
+        self._insert_state_marker(start_time, _begin_model_eviction, 
+                                  lambda t, states: _begin_model_eviction(t, states) if t < eviction_end_time else None)
+
+
+    def state_at(self, time: float) -> list[ModelState]:
+        for (timestamp, states) in self._model_states[::-1]:
+            if timestamp <= time:
+                return states
+        return []
+
+    def placed_models(self, time: float) -> list[Model]:
+        return [state.model for state in self.state_at(time) if state.state == ModelState.PLACED]
+    
+    def placed_model_states(self, time: float) -> list[ModelState]:
+        states = self.state_at(time)
+        if len(states) == 0: 
+            return []
+        return [state for state in states if state.state == ModelState.PLACED]
+    
+    def does_have_idle_copy(self, model: Model, time: float) -> bool:
+        return any(state.model == model and not state.is_reserved_for_batch for state in self.placed_model_states(time))
+    
+    def reserve_idle_copy(self, model: Model, time: float):
+        assert(self.does_have_idle_copy(model, time))
+
+        def _occupy_one_copy(timestamp, states):
+            for j, state in enumerate(states):
+                if state.model == model and \
+                    state.state == ModelState.PLACED and \
+                    not state.is_reserved_for_batch:
+                    states[j].is_reserved_for_batch = True
+                    return
+            assert(False) # should not reach! (no idle copies)
+
+        # reserve 1 idle copy from start to exec end
+        self._insert_state_marker(time, _occupy_one_copy, _occupy_one_copy)
+
+    def release_busy_copy(self, model: Model, time: float):
+        def _release_one_copy(timestamp, states):
+            for i, state in enumerate(states):
+                if state.model == model and state.state == ModelState.PLACED and state.is_reserved_for_batch:
+                    states[i].is_reserved_for_batch = False
+                    return
+
+        self._insert_state_marker(time, _release_one_copy, _release_one_copy)
\ No newline at end of file
diff --git a/workers/taskworker.py b/workers/taskworker.py
index 6ef5d4e..24a02a6 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -14,19 +14,21 @@ def __init__(self, simulation, num_free_slots, worker_id):
         # keep track of the queue information at time:  [ (time1,[task0,task1,]), (time2,[task1,...]),...]
         self.queue_history = {}
         self.involved = False
-        self.next_check_times = {}
+        self.max_wait_times = {}
 
     def add_task(self, current_time, task):
         """
         Add task into the local task queue
         """
-
-        # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived")
-
         # Update when the task is sent to the worker
         assert (task.log.task_placed_on_worker_queue_timestamp <= current_time)
         self.add_task_to_queue_history(task, current_time) # Update when the task is sent to the worker
-        return self.maybe_start_task_for_type(current_time, task.task_type, task.max_wait_time)
+
+        # Initialize max wait time
+        if task.task_type not in self.max_wait_times or self.max_wait_times[task.task_type] < 0:
+            self.max_wait_times[task.task_type] = current_time + task.max_wait_time
+        
+        return self.maybe_start_batch(current_time, task.task_type)
     
     def get_next_models(self, lookahead_count: int, current_time: float, info_staleness=0):
         if lookahead_count <= 0:
@@ -43,13 +45,17 @@ def get_next_models(self, lookahead_count: int, current_time: float, info_stalen
 
         return next_models
 
-    def free_slot(self, current_time, model):
-        """ Frees a slot on the worker and attempts to launch another task in that slot. """
-        # self.num_free_slots += 1
+    def free_slot(self, current_time, model, task_type):
+        """ Attempts to launch another task. """
         if model != None:
-            self.models_in_use.remove(model)
+            self.GPU_state.release_busy_copy(model, current_time)
+
+        get_task_events = []
+        task_types, task_queues = self.get_sorted_task_types(current_time)
+        for task_type in task_types:
+            batch_end_events = self._maybe_start_batch(task_queues[task_type], current_time)
+            get_task_events += batch_end_events
         
-        get_task_events = self.maybe_start_task_any(current_time)
         return get_task_events
 
     #  --------------------------- DECENTRALIZED WORKER SCHEDULING  ----------------------
@@ -86,25 +92,26 @@ def schedule_job_heft(self, current_time, job):
     #  ---------------------------  TASK EXECUTION  ----------------------
 
     _CAN_RUN_NOW = 0
-    _CAN_RUN_ON_LOAD = 1
+    _CAN_RUN_ON_EVICT = 1
     _CANNOT_RUN = 2
 
     def can_run_task(self, current_time: float, model: Model, info_staleness=0) -> int:
         """
             Returns _CAN_RUN_NOW if model None, or model is on GPU and not currently in use.
-            Returns _CAN_RUN_ON_LOAD if model can be loaded onto the GPU (either by evicting 
-            existing models not in use or simply using available memory).
+            Returns _CAN_RUN_ON_EVICT if model can be loaded onto the GPU upon evicting
+            unused models.
             Returns _CANNOT_RUN otherwise.
         """
-        if model == None: # doesn't use GPU
+        if model == None or self.GPU_state.does_have_idle_copy(model, current_time):
+            return self._CAN_RUN_NOW
+        
+        if self.GPU_state.can_fetch_model(model, current_time):
             return self._CAN_RUN_NOW
-        # has >= 1 copies of model in memory that are not currently in use
-        elif self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0:
-                return self._CAN_RUN_NOW
-        elif self.can_fit(model.model_size, current_time, info_staleness):
-            return self._CAN_RUN_ON_LOAD
-        else:
-            return self._CANNOT_RUN
+        
+        if self.GPU_state.can_fetch_model_on_eviction(model, current_time):
+            return self._CAN_RUN_ON_EVICT
+        
+        return self._CANNOT_RUN
 
     def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[tuple[int, int]], dict[tuple[int, int], list[Task]]]:
         """
@@ -113,37 +120,44 @@ def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[tu
             at index 0 is the next to be executed when a slot opens up on the worker)
             in addition to a map of all task_types to their task queues.
         """
-
         task_types = self.queue_history.keys()
         task_queues = { task_type: self.get_queue_history(current_time, task_type, info_staleness) for task_type in task_types }
-        
-        task_types_by_arrival = sorted(
-            filter(lambda task_type: len(task_queues[task_type]) > 0, task_types),
-            key=lambda task_type: task_queues[task_type][0].log.task_placed_on_worker_queue_timestamp,
-        )
 
-        return task_types_by_arrival, task_queues
+        types_to_preempt = sorted(filter(
+            lambda task_type: len(task_queues[task_type]) > 0 and \
+                self.max_wait_times[task_type] >= 0 and self.max_wait_times[task_type] <= current_time, task_types),
+            key=lambda task_type: self.max_wait_times[task_type])
+        types_by_arrival = sorted(filter(lambda task_type: len(task_queues[task_type]) > 0 and \
+                                         self.max_wait_times[task_type] > current_time, task_types),
+            key=lambda task_type: task_queues[task_type][0].log.task_placed_on_worker_queue_timestamp)
+       
+        return (types_to_preempt + types_by_arrival), task_queues
     
     def _maybe_start_batch(self, task_queue: list[Task], current_time: float) -> list[EventOrders]:
+        """
+            Attempts to start a batch drawn from [task_queue]. If there is not
+            enough GPU memory or the [task_queue] is empty, does nothing. If a
+            batch is started, updates task type's next wake up to max_wait_time +
+            earliest remaining task's arrival.
+        """
         # only wake up if existing tasks to avoid congestion since
         # empty queue will wake up on next task enqueue
         if len(task_queue) == 0:
             return []
 
+        batch = []
         batch_end_events = []
-        latest_time = current_time
-        
+
         can_run = self.can_run_task(current_time, task_queue[0].model)
-        if can_run == self._CAN_RUN_ON_LOAD:
+        if can_run == self._CAN_RUN_ON_EVICT:
             current_time += self.evict_models_from_GPU_until(
                 current_time, task_queue[0].model.model_size, self.LOOKAHEAD_EVICTION)
         
-        if can_run == self._CAN_RUN_NOW or can_run == self._CAN_RUN_ON_LOAD:
+        if can_run == self._CAN_RUN_NOW or can_run == self._CAN_RUN_ON_EVICT:
             queued_tasks = queue.Queue()
             [queued_tasks.put(task) for task in task_queue]
 
             # form largest batch < max_batch_size possible
-            batch = []
             while (not queued_tasks.empty()) and len(batch) < task_queue[0].max_batch_size:
                 task = queued_tasks.get()
                 if (current_time >= task.log.task_placed_on_worker_queue_timestamp):
@@ -153,49 +167,52 @@ def _maybe_start_batch(self, task_queue: list[Task], current_time: float) -> lis
                 batch_end_events, task_end_time = self.batch_execute(batch, current_time)
                 for task in batch: # rm all tasks in batch
                     self.rm_task_in_queue_history(task, current_time)
-                latest_time = task_end_time
-
-        # track next wake up time so old wake ups can be skipped
-        next_check_time = latest_time + task_queue[0].max_wait_time
-        self.next_check_times[task_queue[0].task_type] = next_check_time
-
-        # if idle, check again in wait time
-        # NOTE: for some reason, appending to task_end_events does not always
-        # lead to event being enqueued; thus we enqueue directly to sim queue here
-        self.simulation.event_queue.put(EventOrders(
-            next_check_time,
-            WorkerWakeUpEvent(self, 
-                                task_queue[0].task_type, 
-                                task_queue[0].max_wait_time)))
-        
+
+                # if successfully launched batch, reset max wait time
+                if not queued_tasks.empty():
+                    earliest_remaining_arrival = -1
+                    while not queued_tasks.empty():
+                        task = queued_tasks.get()
+                        if earliest_remaining_arrival < 0 or \
+                            task.log.task_placed_on_worker_queue_timestamp < earliest_remaining_arrival:
+                            earliest_remaining_arrival = task.log.task_placed_on_worker_queue_timestamp
+                    self.max_wait_times[batch[0].task_type] = earliest_remaining_arrival + batch[0].max_wait_time
+                else:
+                    self.max_wait_times[batch[0].task_type] = -1
+
         return batch_end_events
     
-    def maybe_start_task_any(self, current_time):
-        all_end_events = []
-        task_types, task_queues = self.get_sorted_task_types(current_time)
-        for task_type in task_types:
-            all_end_events += self._maybe_start_batch(task_queues[task_type], current_time)
-        return all_end_events
-    
-    def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> tuple[bool, list]:
+    def maybe_start_batch(self, current_time: float, task_type: tuple[int, int]):
+        """
+            Attempts to launch a batch of [task_type]. Does nothing if there are no
+            tasks of [task_type] queued.
+        """
         task_queue = self.get_queue_history(current_time, task_type, info_staleness=0)
         return self._maybe_start_batch(task_queue, current_time)
 
     def batch_execute(self, tasks, current_time):
+        """
+            Fetches a new copy or reserves an idle copy of any required GPU models
+            and executes the batch [tasks]. Returns a list containing the 
+            BatchEndEvent and the batch execution end time.
+        """
         assert(len(tasks) > 0) # cannot launch empty batch
 
         self.involved = True
-        if tasks[0].model != None:
-            self.models_in_use.append(tasks[0].model)
         
-        model_fetch_time = self.fetch_model(tasks[0].model, current_time)
-
         batch_index = 0
         for i, batch_size in enumerate(sorted(tasks[0].batch_sizes)):
             if len(tasks) <= batch_size: # choose smallest batch size > len(tasks)
                 batch_index = i
                 break
 
+        model_fetch_time = 0
+        if tasks[0].model != None:
+            if self.GPU_state.does_have_idle_copy(tasks[0].model, current_time):
+                self.GPU_state.reserve_idle_copy(tasks[0].model, current_time)
+            else:
+                model_fetch_time = self.fetch_model(tasks[0].model, current_time)
+
         task_end_time = current_time + model_fetch_time + tasks[0].batch_exec_time[batch_index]
         task_end_events = []
 
@@ -227,13 +244,9 @@ def batch_execute(self, tasks, current_time):
             "job_ids": job_ids
         }
 
-        task_end_events.append(EventOrders(current_time + model_fetch_time, BatchStartEvent(
-            self, job_ids=job_ids, task_type=tasks[0].task_type
-        )))
         task_end_events.append(EventOrders(task_end_time, BatchEndEvent(
             self, tasks[0].model, job_ids=job_ids, task_type=tasks[0].task_type
         )))
-
         return task_end_events, task_end_time
 
     #  ---------------------------  Subsequent TASK Transfer   --------------------
diff --git a/workers/worker.py b/workers/worker.py
index 6182f08..685b183 100644
--- a/workers/worker.py
+++ b/workers/worker.py
@@ -5,6 +5,8 @@
 
 import pandas as pd
 
+from workers.model_state import *
+
 
 class Worker(object):
     """ Abstract class representing workers. """
@@ -15,14 +17,12 @@ def __init__(self, simulation, num_free_slots, worker_id):
         self.num_free_slots = num_free_slots
         self.current_batch = [] # track the currently executing batch (if any)
         self.GPU_memory_models = []
-        # Keep track of the list of models sitting in GPU memory at time: 
-        # {time-> list of model objects} : [ (time1,[model0,model1,]), (time2,[model1,...]),...]
-        self.GPU_memory_models_history = []
-        self.models_in_use = [] # models in use by a currently executing batch
+        self.GPU_state = GPUState()
 
         self.model_history_log = pd.DataFrame(columns=["start_time", "end_time",
                                                        "model_id", "placed_or_evicted"])
 
+
     def __hash__(self):
         return hash(self.worker_id)
 
@@ -51,69 +51,18 @@ def initial_model_placement(self, model):
             return 1
         return 1
 
-    def used_GPUmemory(self, current_time, info_staleness=0, requiring_worker_id=None) -> int:
-        """
-        Helper function for local GPU memory usage check
-        """
-        if requiring_worker_id == self.worker_id:
-            info_staleness = 0
-        models = self.get_model_history(current_time, info_staleness)
-        return sum(m.model_size for m in models)
-
     #  ----------  LOCAL MEMORY MANAGEMENT AND RETRIEVE  ----------"""
-    def does_have_model(self, model, current_time: float, info_staleness=0) -> bool:
-        w_models = self.get_model_history(current_time, info_staleness)
-        return model in w_models
-    
-    def copies_in_memory(self, model, current_time: float, info_staleness=0) -> int:
-        w_models = self.get_model_history(current_time, info_staleness)
-        return w_models.count(model)
-
-    def can_fit(self, min_required_memory: int, current_time: float, info_staleness=0) -> bool:
-        # models currently being fetched = models in use - models loaded on GPU
-        loaded_models = self.get_model_history(current_time, info_staleness)
-        fetching_models = []
-        for model in self.models_in_use:
-            if model in loaded_models:
-                loaded_models.remove(model)
-            else:
-                fetching_models.append(model)
-
-        # loaded models + models currently being fetched
-        used_memory = self.used_GPUmemory(current_time, info_staleness=info_staleness) + \
-                      sum([model.model_size for model in fetching_models])
-        
-        # if currently available memory >= min_required_memory
-        if GPU_MEMORY_SIZE - used_memory >= min_required_memory:
-            return True
-        
-        # if no batches/current batches do not use GPU
-        if self.models_in_use == [] and min_required_memory <= GPU_MEMORY_SIZE:
-            return True
-        
-        # if evicting all except current required models & models being fetched can make enough space
-        if GPU_MEMORY_SIZE - sum(map(lambda m: m.model_size, self.models_in_use)) - \
-            sum(map(lambda m: m.model_size, fetching_models)) >= min_required_memory:
-            return True
-        
-        return False
-
     def fetch_model(self, model, current_time):
-        """
-        Return: model transfer time required to execute the Task
-        Every "task" requires one "model" to be executed correctly
-        add this information to 2 histories:  
-            1. model_history on worker
-            2. cache_history on metadata_service
-        """
-        # check if exists a copy of the model not currently in use
-        if model is None or \
-            self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0:
+        if model == None or self.GPU_state.does_have_idle_copy(model, current_time):
             return 0
         
         fetch_time = 0
         fetch_time = SameMachineCPUtoGPU_delay(model.model_size)
 
+        self.simulation.metadata_service.add_model_cached_location(
+            model, self.worker_id, current_time + fetch_time)
+        self.GPU_state.fetch_model(model, current_time, fetch_time)
+        
         self.model_history_log.loc[len(self.model_history_log)] = {
             "start_time": current_time,
             "end_time": current_time + fetch_time, 
@@ -121,9 +70,6 @@ def fetch_model(self, model, current_time):
             "placed_or_evicted": "placed"
         }
 
-        self.simulation.metadata_service.add_model_cached_location(
-            model, self.worker_id, current_time + fetch_time)
-        self.add_model_to_memory_history(model, current_time + fetch_time)
         return fetch_time
     
     # NOTE: REQUIRED OVERRIDE
@@ -135,20 +81,22 @@ def get_next_models(self, lookahead_count: int, current_time: float, info_stalen
         return []
 
     def _evict_models_from_GPU(self, models_to_evict, current_time):
+        # NOTE: Assumes any number of models can be evicted concurrently!
         eviction_duration = 0
         for model in models_to_evict:
-            if model not in self.models_in_use:
-                self.simulation.metadata_service.rm_model_cached_location(
-                    model, self.worker_id, current_time)
-                self.rm_model_in_memory_history(model, current_time)
-                eviction_duration += SameMachineGPUtoCPU_delay(model.model_size)
-
-                self.model_history_log.loc[len(self.model_history_log)] = {
-                    "start_time": current_time,
-                    "end_time": current_time + eviction_duration, 
-                    "model_id": model.model_id,
-                    "placed_or_evicted": "evicted"
-                }
+            self.simulation.metadata_service.rm_model_cached_location(
+                model, self.worker_id, current_time)
+            
+            evict_time = SameMachineGPUtoCPU_delay(model.model_size)
+            self.GPU_state.evict_model(model, current_time, evict_time)
+            eviction_duration = max(evict_time, eviction_duration)
+
+            self.model_history_log.loc[len(self.model_history_log)] = {
+                "start_time": current_time,
+                "end_time": current_time + eviction_duration, 
+                "model_id": model.model_id,
+                "placed_or_evicted": "evicted"
+            }
         return eviction_duration
 
     LOOKAHEAD_EVICTION = 0
@@ -159,93 +107,30 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory:
             Evicts models from GPU according to FCFS or lookahead eviction policy until at least
             min_required_memory space is available. Returns time taken to execute model
             evictions. 0 if min_required_memory could not be created.
-            Assumes batches run in first task arrival order.
+            Assumes batches run in earliest task arrival order.
         """
-        if not self.can_fit(min_required_memory, current_time):
-            return 0
-        
-        curr_memory = GPU_MEMORY_SIZE - self.used_GPUmemory(current_time)
+        curr_memory = self.GPU_state.available_memory(current_time)
        
-        models_in_GPU = self.get_model_history(current_time, info_staleness=0)
+        placed_model_states = self.GPU_state.placed_model_states(current_time)
         if policy == self.LOOKAHEAD_EVICTION:
             next_models = self.get_next_models(3, current_time)
-            models_in_GPU = sorted(
-                models_in_GPU, 
-                key=lambda m: next_models.index(m) if m in next_models else len(next_models),
+            placed_model_states = sorted(
+                placed_model_states, 
+                key=lambda m: next_models.index(m.model) if m.model in next_models else len(next_models),
                 reverse=True
             )
 
         models_to_evict = []
-        for model in models_in_GPU:
-            if model not in self.models_in_use:
-                curr_memory -= model.model_size
-                models_to_evict.append(model)
+        for state in placed_model_states:
+            if not state.is_reserved_for_batch:
+                curr_memory += state.model.model_size
+                models_to_evict.append(state.model)
                 if curr_memory >= min_required_memory:
                     return self._evict_models_from_GPU(models_to_evict, current_time)
-        
+
         return 0
 
     # ------------------------- cached model history update helper functions ---------------
-    def add_model_to_memory_history(self, model, current_time):
-        assert (model.model_size <= GPU_MEMORY_SIZE)
-        last_index = len(self.GPU_memory_models_history) - 1
-        # 0. base case
-        if last_index == -1:
-            self.GPU_memory_models_history.append((current_time, [model]))
-            return
-        # 1. Find the time_stamp place to add this queue information
-        while last_index >= 0:
-            if self.GPU_memory_models_history[last_index][0] == current_time:
-                if model not in self.GPU_memory_models_history[last_index][1]:
-                    self.GPU_memory_models_history[last_index][1].append(model)
-                break
-            if self.GPU_memory_models_history[last_index][0] < current_time:
-                if model not in self.GPU_memory_models_history[last_index][1]:
-                    next_queue = self.GPU_memory_models_history[last_index][1].copy(
-                    )
-                    next_queue.append(model)
-                    last_index += 1
-                    self.GPU_memory_models_history.insert(
-                        last_index, (current_time, next_queue)
-                    )
-                break
-            # check the previous entry
-            last_index -= 1
-        # 2. added the worker_id to all the subsequent timestamp tuples
-        while last_index < len(self.GPU_memory_models_history):
-            if model not in self.GPU_memory_models_history[last_index][1]:
-                self.GPU_memory_models_history[last_index][1].append(model)
-            last_index += 1
-
-    def rm_model_in_memory_history(self, model, current_time):
-        last_index = len(self.GPU_memory_models_history) - 1
-        # 0. base case: shouldn't happen
-        if last_index == -1:
-            AssertionError("rm model cached location to an empty list")
-            return
-        # 1. find the place to add this remove_event to the tuple list
-        while last_index >= 0:
-            if self.GPU_memory_models_history[last_index][0] == current_time:
-                if model in self.GPU_memory_models_history[last_index][1]:
-                    self.GPU_memory_models_history[last_index][1].remove(model)
-                break
-            if self.GPU_memory_models_history[last_index][0] < current_time:
-                if model in self.GPU_memory_models_history[last_index][1]:
-                    next_tasks_in_memory = self.GPU_memory_models_history[last_index][1].copy(
-                    )
-                    next_tasks_in_memory.remove(model)
-                    last_index = last_index + 1
-                    self.GPU_memory_models_history.insert(
-                        last_index, (current_time, next_tasks_in_memory)
-                    )
-                break
-            last_index -= 1  # go to prev time
-        # 2. remove the task from all the subsequent tuple
-        while last_index < len(self.GPU_memory_models_history):
-            if model in self.GPU_memory_models_history[last_index]:
-                self.GPU_memory_models_history[last_index][1].remove(model)
-            last_index += 1  # do this for the remaining element after
-
     def get_history(self, history, current_time, info_staleness) -> list:
         delayed_time = current_time - info_staleness
         last_index = len(history) - 1
@@ -253,11 +138,4 @@ def get_history(self, history, current_time, info_staleness) -> list:
             if history[last_index][0] <= delayed_time:
                 return history[last_index][1].copy()
             last_index -= 1  # check the previous one
-        return []
-
-    def get_model_history(self, current_time, info_staleness=0, requiring_workerid= None) -> list:
-        if requiring_workerid == self.worker_id:
-            info_staleness = 0
-        return self.get_history(self.GPU_memory_models_history, current_time, info_staleness)
-
-
+        return []
\ No newline at end of file

From 84e1c0d92d03ec10569907876e1e0bc1302af976 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 17 Jun 2025 10:48:22 -0400
Subject: [PATCH 35/41] cache logic update for gpu state

---
 schedulers/algo/nav_heft_algo.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/schedulers/algo/nav_heft_algo.py b/schedulers/algo/nav_heft_algo.py
index 1f2e2e3..51dac17 100644
--- a/schedulers/algo/nav_heft_algo.py
+++ b/schedulers/algo/nav_heft_algo.py
@@ -98,9 +98,9 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co
         
         available_memory = GPU_MEMORY_SIZE
         if consider_cache:
-            available_memory = workers[worker_id].used_GPUmemory(current_time, \
-                                                                 info_staleness=PLACEMENT_INFORMATION_STALENESS, \
-                                                                 requiring_worker_id=initial_worker_id)
+            available_memory = workers[worker_id].GPU_state.available_memory(current_time)
+                                                                 # info_staleness=PLACEMENT_INFORMATION_STALENESS, \
+                                                                 # requiring_worker_id=initial_worker_id)
         workers_available_memory[worker_id] = available_memory
         
     # Select the best worker for each task based on their ranking from high to low
@@ -128,13 +128,12 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co
             model_fetch_time = 0
             cur_fetching_model_size = 0
             if consider_cache:
-                models_in_cur_worker = workers[cur_worker_id].get_model_history(current_time, \
-                                                                             info_staleness=PLACEMENT_INFORMATION_STALENESS, \
-                                                                             requiring_workerid= initial_worker_id)
-                if cur_task.model is not None and cur_task.model not in models_in_cur_worker:
+                # TODO: info staleness
+                if cur_task.model is not None and \
+                    not workers[cur_worker_id].GPU_state.does_have_idle_copy(cur_task.model, current_time):
                     model_fetch_time = SameMachineCPUtoGPU_delay(cur_task.model.model_size)
                     cur_fetching_model_size = cur_task.model.model_size
-                    if workers_available_memory[cur_worker_id] + cur_task.model.model_size > GPU_MEMORY_SIZE:
+                    if not workers[cur_worker_id].GPU_state.can_fetch_model(cur_task.model, current_time):
                         # double model fetch time due to the overhead from model_eviction
                         model_fetch_time += model_fetch_time
             cur_earliest_start_time += model_fetch_time

From 86566d111419465a1cd8ff70b2fbcf7fa4b81603 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 17 Jun 2025 10:49:20 -0400
Subject: [PATCH 36/41] remove batch start

---
 core/events.py | 81 +-------------------------------------------------
 1 file changed, 1 insertion(+), 80 deletions(-)

diff --git a/core/events.py b/core/events.py
index 3bb6202..c815a90 100644
--- a/core/events.py
+++ b/core/events.py
@@ -150,57 +150,6 @@ def run(self, current_time):
     def to_string(self):
         return "[Intermediate Results Arrival]: worker:" + str(self.worker.worker_id) + ", prev_task_id:" + str(self.prev_task.task_id) + ", cur_task_id:" + str(self.cur_task.task_id)
 
-
-class TaskEndEvent(Event):
-    """ Event to signify that a TASK has been performed by the WORKER. """
-
-    def __init__(self, worker, job_id=-1, task_id=-1):
-        self.worker = worker
-        self.job_id = job_id    # integer representing the job_id
-        self.task_id = task_id  # integer representing the task_id
-
-    def run(self, current_time):
-        return self.worker.free_slot(current_time, self)
-
-    def to_string(self):
-        return "[Task End (Job {} - Task {}) at Worker {}] ===".format(self.job_id, self.task_id, self.worker.worker_id)
-
-
-class BatchStartEvent(Event):
-    """ 
-        Event to signify that a BATCH has started executing in WORKER.
-        Only for logging purposes.
-    """
-
-    def __init__(self, worker, job_ids=[], task_type=(-1, -1)):
-        self.worker = worker
-        self.job_ids = job_ids      # list[int] with the job_ids in the batch
-        self.task_type = task_type  # (workflow_id, task_id) identifying the batch task_type
-
-    def run(self, current_time):
-        return []
-
-    def to_string(self):
-        jobs = ",".join([str(id) for id in self.job_ids])
-        return f"[Batch Start (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]"
-
-
-class BatchEndEvent(Event):
-    """ Event to signify that a BATCH has been performed by the WORKER. """
-
-    def __init__(self, worker, job_ids=[], task_type=(-1, -1)):
-        self.worker = worker
-        self.job_ids = job_ids      # list[int] with the job_ids in the batch
-        self.task_type = task_type  # (workflow_id, task_id) identifying the batch task_type
-
-    def run(self, current_time):
-        return []
-
-    def to_string(self):
-        jobs = ",".join([str(id) for id in self.job_ids])
-        return f"[Batch Start (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]"
-
-
 class BatchEndEvent(Event):
     """ Event to signify that a BATCH has been performed by the WORKER. """
 
@@ -211,7 +160,7 @@ def __init__(self, worker, model, job_ids=[], task_type=(-1, -1)):
         self.task_type = task_type # (workflow_id, task_id)
 
     def run(self, current_time):
-        return self.worker.free_slot(current_time, self.model)
+        return self.worker.free_slot(current_time, self.model, self.task_type)
 
     def to_string(self):
         jobs = ",".join([str(id) for id in self.job_ids])
@@ -251,34 +200,6 @@ def run(self, current_time):
     def to_string(self):
         return "[Job End] ==="
 
-
-class WorkerWakeUpEvent(Event):
-    """
-    Event to signify that max_wait_time has passed and worker should
-    check task queue.
-    """
-
-    def __init__(self, worker, task_id, task_max_wait_time):
-        self.worker = worker
-        self.task_id = task_id
-        self.task_max_wait_time = task_max_wait_time
-
-    def run(self, current_time):
-        if self.will_run(current_time):
-            return self.worker.maybe_start_task_for_type(
-                current_time, self.task_id, self.task_max_wait_time)
-        return []
-
-    def to_string(self):
-        return f"[Worker (id: {self.worker.worker_id}) Wake Up (task id: {self.task_id})]"
-    
-    def will_run(self, current_time):
-        # skip current wake up if a later wake up has been scheduled
-        if self.task_id in self.worker.next_check_times:
-            return current_time >= self.worker.next_check_times[self.task_id]
-        return True # if no batch has been run yet, wake up should be executed
-
-
 class EventOrders:
     """
     Used so that the Simulation keeps track of the priority queue order

From 9af0327a43c48632f67edc1b2c65c6d589d8d6f7 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 17 Jun 2025 13:57:46 -0400
Subject: [PATCH 37/41] reserving GPU space for model fetching upon eviction

---
 workers/model_state.py | 65 ++++++++++++++++++++++++++++++++++--------
 workers/worker.py      | 46 ++++++++++++++++--------------
 2 files changed, 78 insertions(+), 33 deletions(-)

diff --git a/workers/model_state.py b/workers/model_state.py
index d6acefe..b0e71ce 100644
--- a/workers/model_state.py
+++ b/workers/model_state.py
@@ -6,12 +6,13 @@
 
 class ModelState:
     PLACED = 0
-    IN_FETCH = 1
-    IN_EVICT = 2
+    PRE_FETCH = 1 # reserved for a model that will be fetched
+    IN_FETCH = 2
+    IN_EVICT = 3
 
-    def __init__(self, model: Model, state: int, is_reserved_for_batch=True):
-        assert(state in [self.PLACED, self.IN_FETCH, self.IN_EVICT])
+    def __init__(self, model: Model, state: int, is_reserved_for_batch=True, size=0):
         self.model = model
+        self.size = size if size > 0 else model.model_size
         self.state = state
         self.is_reserved_for_batch = is_reserved_for_batch
 
@@ -19,7 +20,7 @@ def __eq__(self, value):
         return type(value) == ModelState and self.model == value.model and self.state == value.state
     
     def __str__(self):
-        return f"<[{self._state_to_str()}] [{"NOT " if not self.is_reserved_for_batch else ""}IN USE] Model ID: {self.model.model_id}>"
+        return f"<[{self._state_to_str()}] [{"NOT " if not self.is_reserved_for_batch else ""}IN USE] Model ID: {self.model.model_id if self.model else -1}>"
     
     def __repr__(self):
         return self.__str__()
@@ -28,6 +29,7 @@ def _state_to_str(self):
         if self.state == self.PLACED: return "Placed"
         elif self.state == self.IN_FETCH: return "Fetching"
         elif self.state == self.IN_EVICT: return "Evicting"
+        elif self.state == self.PRE_FETCH: return "Reserved"
 
 
 class GPUState(object):
@@ -38,15 +40,14 @@ def __init__(self):
     def reserved_memory(self, time: float) -> float:
         """
             Returns the total GPU memory that is currently in use, either
-            for currently placed models, models that are being fetched, or
-            models that are being evicted.
+            for currently placed models, models that are being fetched,
+            models that are being evicted, or models that will be fetched.
         """
-        return sum(state.model.model_size for state in self.state_at(time))
+        return sum(state.size for state in self.state_at(time))
     
     def available_memory(self, time: float) -> float:
         """
-            Returns total GPU memory not occupied by a model, reserved for
-            a model being fetched, or used by a model being evicted.
+            Returns total GPU memory that is not reserved (see reserved_memory).
         """
         return GPU_MEMORY_SIZE - self.reserved_memory(time)
 
@@ -64,7 +65,7 @@ def can_fetch_model_on_eviction(self, model: Model, time: float) -> bool:
         """
         # cannot use space occupied by models currently being fetched/evicted or used
         return (self.available_memory(time) + \
-                sum(state.model.model_size for state in self.state_at(time) 
+                sum(state.size for state in self.state_at(time) 
                     if state.state == ModelState.PLACED and not state.is_reserved_for_batch)) >= model.model_size
     
     def _insert_state_marker(self, marker_time: float, at_marker_modify, post_marker_modify):
@@ -122,7 +123,13 @@ def fetch_model(self, model: Model, start_time: float, fetch_time: float):
                                   lambda t, states: states.append(ModelState(model, ModelState.IN_FETCH)) if t < fetch_end_time else None)
 
     
-    def evict_model(self, model: Model, start_time: float, evict_time: float):
+    def evict_model(self, model: Model, start_time: float, evict_time: float, reserve_until=-1):
+        """
+            Evicts [model] starting at [start_time] in [evict_time] time.
+            Reserves evicted space until [reserve_until]. This prevents other models
+            from being loaded in space that may be intended to fetch a specific model.
+            Does not reserve if [reserve_until] < 0.
+        """
         assert(model in self.placed_models(start_time))
 
         eviction_end_time = start_time + evict_time
@@ -147,7 +154,33 @@ def _begin_model_eviction(timestamp, states):
         # add eviction start marker
         self._insert_state_marker(start_time, _begin_model_eviction, 
                                   lambda t, states: _begin_model_eviction(t, states) if t < eviction_end_time else None)
+        
+        if reserve_until >= 0:
+            self.reserve_model_space(model, model.model_size, eviction_end_time, reserve_until)
+        
+    def reserve_model_space(self, model: Model, size: float, start_time: float, end_time: float):
+        """
+            Reserves [size] extra space for [model] from [start_time] to [end_time].
+            Used during evictions when additional space must be reserved in addition
+            to space from evicted or currently evicting models for when [model] is
+            fetched. Prevents other models from being fetched in space made for
+            [model].
+        """
+        assert(size > 0)
 
+        # mark reservation start
+        self._insert_state_marker(start_time,
+                                  lambda _, states: states.append(ModelState(model, ModelState.PRE_FETCH, size=size)),
+                                  lambda t, states: states.append(ModelState(model, ModelState.PRE_FETCH, size=size)) if t < end_time else None)
+        
+        def _remove_reservation(timestamp, states):
+            for state in states:
+                if state.model == model and state.state == ModelState.PRE_FETCH and state.size == size:
+                    states.remove(state)
+                    return
+
+        # mark reservation end
+        self._insert_state_marker(end_time, _remove_reservation, lambda _, states: None)
 
     def state_at(self, time: float) -> list[ModelState]:
         for (timestamp, states) in self._model_states[::-1]:
@@ -168,6 +201,11 @@ def does_have_idle_copy(self, model: Model, time: float) -> bool:
         return any(state.model == model and not state.is_reserved_for_batch for state in self.placed_model_states(time))
     
     def reserve_idle_copy(self, model: Model, time: float):
+        """
+            If there is an idle copy of [model], reserve it to execute a batch
+            starting from [time]. When execution finishes, a call to
+            [release_busy_copy] is required.
+        """
         assert(self.does_have_idle_copy(model, time))
 
         def _occupy_one_copy(timestamp, states):
@@ -183,6 +221,9 @@ def _occupy_one_copy(timestamp, states):
         self._insert_state_marker(time, _occupy_one_copy, _occupy_one_copy)
 
     def release_busy_copy(self, model: Model, time: float):
+        """
+            Releases a previously occupied/reserved copy of [model] at [time].
+        """
         def _release_one_copy(timestamp, states):
             for i, state in enumerate(states):
                 if state.model == model and state.state == ModelState.PLACED and state.is_reserved_for_batch:
diff --git a/workers/worker.py b/workers/worker.py
index 685b183..3a5a2ef 100644
--- a/workers/worker.py
+++ b/workers/worker.py
@@ -80,25 +80,6 @@ def get_next_models(self, lookahead_count: int, current_time: float, info_stalen
         """
         return []
 
-    def _evict_models_from_GPU(self, models_to_evict, current_time):
-        # NOTE: Assumes any number of models can be evicted concurrently!
-        eviction_duration = 0
-        for model in models_to_evict:
-            self.simulation.metadata_service.rm_model_cached_location(
-                model, self.worker_id, current_time)
-            
-            evict_time = SameMachineGPUtoCPU_delay(model.model_size)
-            self.GPU_state.evict_model(model, current_time, evict_time)
-            eviction_duration = max(evict_time, eviction_duration)
-
-            self.model_history_log.loc[len(self.model_history_log)] = {
-                "start_time": current_time,
-                "end_time": current_time + eviction_duration, 
-                "model_id": model.model_id,
-                "placed_or_evicted": "evicted"
-            }
-        return eviction_duration
-
     LOOKAHEAD_EVICTION = 0
     FCFS_EVICTION = 1
 
@@ -126,8 +107,31 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory:
                 curr_memory += state.model.model_size
                 models_to_evict.append(state.model)
                 if curr_memory >= min_required_memory:
-                    return self._evict_models_from_GPU(models_to_evict, current_time)
-
+                    # NOTE: Assumes models can be evicted concurrently
+                    model_evict_times = list(map(lambda m: SameMachineGPUtoCPU_delay(m.model_size), models_to_evict))
+                    eviction_duration = max(model_evict_times)
+                    full_eviction_end = current_time + eviction_duration
+
+                    # must reserve space to prevent other models from loading in space created here
+                    extra_to_reserve = min_required_memory - sum(m.model_size for m in models_to_evict)
+                    if extra_to_reserve > 0:
+                        self.GPU_state.reserve_model_space(None, extra_to_reserve, current_time, full_eviction_end)
+
+                    for i in range(len(models_to_evict)):
+                        self.simulation.metadata_service.rm_model_cached_location(
+                            models_to_evict[i], self.worker_id, current_time)
+                        self.GPU_state.evict_model(models_to_evict[i], 
+                                                   current_time, 
+                                                   model_evict_times[i],
+                                                   reserve_until=full_eviction_end)
+                        
+                        self.model_history_log.loc[len(self.model_history_log)] = {
+                            "start_time": current_time,
+                            "end_time": current_time + eviction_duration, 
+                            "model_id": models_to_evict[i].model_id,
+                            "placed_or_evicted": "evicted"
+                        }
+                    return eviction_duration
         return 0
 
     # ------------------------- cached model history update helper functions ---------------

From 6b79f44d7ad2910e88ef20655321f98e26779e47 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 17 Jun 2025 14:14:41 -0400
Subject: [PATCH 38/41] removed wake up

---
 schedulers/decentralized/simulation_decentral.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/schedulers/decentralized/simulation_decentral.py b/schedulers/decentralized/simulation_decentral.py
index cc80ac5..104c537 100644
--- a/schedulers/decentralized/simulation_decentral.py
+++ b/schedulers/decentralized/simulation_decentral.py
@@ -51,8 +51,10 @@ def run(self):
         while self.remaining_jobs > 0:
             cur_event = self.event_queue.get()
 
-            if type(cur_event.event) != WorkerWakeUpEvent or cur_event.event.will_run(cur_event.current_time):
-                self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()]
+            print(cur_event.to_string())
+            print(f"Jobs left: {self.remaining_jobs}")
+
+            self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()]
 
             assert cur_event.current_time >= last_time
             last_time = cur_event.current_time
@@ -60,6 +62,4 @@ def run(self):
             for new_event in new_events:
                 last_time = cur_event.current_time
                 self.event_queue.put(new_event)
-        self.run_finish(last_time, by_job_type=True)
-        
-        
+        self.run_finish(last_time, by_job_type=True)
\ No newline at end of file

From 0120870259e6c31a75231ad3bec1fd184b556723 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Tue, 17 Jun 2025 14:17:08 -0400
Subject: [PATCH 39/41] to str fix

---
 workers/model_state.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workers/model_state.py b/workers/model_state.py
index b0e71ce..c09090f 100644
--- a/workers/model_state.py
+++ b/workers/model_state.py
@@ -20,7 +20,7 @@ def __eq__(self, value):
         return type(value) == ModelState and self.model == value.model and self.state == value.state
     
     def __str__(self):
-        return f"<[{self._state_to_str()}] [{"NOT " if not self.is_reserved_for_batch else ""}IN USE] Model ID: {self.model.model_id if self.model else -1}>"
+        return f"<[{self._state_to_str()}] [{'NOT ' if not self.is_reserved_for_batch else ''}IN USE] Model ID: {self.model.model_id if self.model else -1}>"
     
     def __repr__(self):
         return self.__str__()

From 26076cba35f0831173e93cb004a500f5f17fbe2f Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Wed, 18 Jun 2025 08:09:58 -0400
Subject: [PATCH 40/41] cannot load duplicates

---
 workers/taskworker.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/workers/taskworker.py b/workers/taskworker.py
index 24a02a6..9cc350e 100644
--- a/workers/taskworker.py
+++ b/workers/taskworker.py
@@ -105,6 +105,10 @@ def can_run_task(self, current_time: float, model: Model, info_staleness=0) -> i
         if model == None or self.GPU_state.does_have_idle_copy(model, current_time):
             return self._CAN_RUN_NOW
         
+        # cannot load additional copies of the same model
+        if any(map(lambda s: s.model == model, self.GPU_state.state_at(current_time))):
+            return self._CANNOT_RUN
+        
         if self.GPU_state.can_fetch_model(model, current_time):
             return self._CAN_RUN_NOW
         

From f268dc226b568cac77ce86617413638b3f776670 Mon Sep 17 00:00:00 2001
From: Tami Takada <tamitakada@gmail.com>
Date: Wed, 18 Jun 2025 08:11:10 -0400
Subject: [PATCH 41/41] no eviction time

---
 workers/model_state.py | 30 +++++++++++++++---------------
 workers/worker.py      | 28 ++++++++++++----------------
 2 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/workers/model_state.py b/workers/model_state.py
index c09090f..cfdd764 100644
--- a/workers/model_state.py
+++ b/workers/model_state.py
@@ -65,7 +65,7 @@ def can_fetch_model_on_eviction(self, model: Model, time: float) -> bool:
         """
         # cannot use space occupied by models currently being fetched/evicted or used
         return (self.available_memory(time) + \
-                sum(state.size for state in self.state_at(time) 
+                sum(state.size for state in self.state_at(time)
                     if state.state == ModelState.PLACED and not state.is_reserved_for_batch)) >= model.model_size
     
     def _insert_state_marker(self, marker_time: float, at_marker_modify, post_marker_modify):
@@ -113,12 +113,12 @@ def fetch_model(self, model: Model, start_time: float, fetch_time: float):
             return
         
         # add fetch end marker
-        self._insert_state_marker(fetch_end_time, 
+        self._insert_state_marker(fetch_end_time,
                                   lambda _, states: states.append(ModelState(model, ModelState.PLACED)),
                                   lambda _, states: states.append(ModelState(model, ModelState.PLACED)))
         
         # add fetch start marker
-        self._insert_state_marker(start_time, 
+        self._insert_state_marker(start_time,
                                   lambda _, states: states.append(ModelState(model, ModelState.IN_FETCH)),
                                   lambda t, states: states.append(ModelState(model, ModelState.IN_FETCH)) if t < fetch_end_time else None)
 
@@ -144,19 +144,19 @@ def _remove_model(timestamp, states):
 
         self._insert_state_marker(eviction_end_time, _remove_model, _remove_model)
 
-        def _begin_model_eviction(timestamp, states):
-            for state in states:
-                if state.state == ModelState.PLACED and state.model == model and not state.is_reserved_for_batch:
-                    state.state = ModelState.IN_EVICT
-                    return
-            assert(False) # should not happen: no model exists to evict
+        # def _begin_model_eviction(timestamp, states):
+        #     for state in states:
+        #         if state.state == ModelState.PLACED and state.model == model and not state.is_reserved_for_batch:
+        #             state.state = ModelState.IN_EVICT
+        #             return
+        #     assert(False) # should not happen: no model exists to evict
 
         # add eviction start marker
-        self._insert_state_marker(start_time, _begin_model_eviction, 
-                                  lambda t, states: _begin_model_eviction(t, states) if t < eviction_end_time else None)
+        # self._insert_state_marker(start_time, _begin_model_eviction,
+        #                           lambda t, states: _begin_model_eviction(t, states) if t < eviction_end_time else None)
         
-        if reserve_until >= 0:
-            self.reserve_model_space(model, model.model_size, eviction_end_time, reserve_until)
+        # if reserve_until >= 0:
+        #     self.reserve_model_space(model, model.model_size, eviction_end_time, reserve_until)
         
     def reserve_model_space(self, model: Model, size: float, start_time: float, end_time: float):
         """
@@ -193,7 +193,7 @@ def placed_models(self, time: float) -> list[Model]:
     
     def placed_model_states(self, time: float) -> list[ModelState]:
         states = self.state_at(time)
-        if len(states) == 0: 
+        if len(states) == 0:
             return []
         return [state for state in states if state.state == ModelState.PLACED]
     
@@ -230,4 +230,4 @@ def _release_one_copy(timestamp, states):
                     states[i].is_reserved_for_batch = False
                     return
 
-        self._insert_state_marker(time, _release_one_copy, _release_one_copy)
\ No newline at end of file
+        self._insert_state_marker(time, _release_one_copy, _release_one_copy)
diff --git a/workers/worker.py b/workers/worker.py
index 3a5a2ef..5c1a9f7 100644
--- a/workers/worker.py
+++ b/workers/worker.py
@@ -65,7 +65,7 @@ def fetch_model(self, model, current_time):
         
         self.model_history_log.loc[len(self.model_history_log)] = {
             "start_time": current_time,
-            "end_time": current_time + fetch_time, 
+            "end_time": current_time + fetch_time,
             "model_id": model.model_id,
             "placed_or_evicted": "placed"
         }
@@ -96,7 +96,7 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory:
         if policy == self.LOOKAHEAD_EVICTION:
             next_models = self.get_next_models(3, current_time)
             placed_model_states = sorted(
-                placed_model_states, 
+                placed_model_states,
                 key=lambda m: next_models.index(m.model) if m.model in next_models else len(next_models),
                 reverse=True
             )
@@ -107,31 +107,27 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory:
                 curr_memory += state.model.model_size
                 models_to_evict.append(state.model)
                 if curr_memory >= min_required_memory:
-                    # NOTE: Assumes models can be evicted concurrently
-                    model_evict_times = list(map(lambda m: SameMachineGPUtoCPU_delay(m.model_size), models_to_evict))
-                    eviction_duration = max(model_evict_times)
-                    full_eviction_end = current_time + eviction_duration
+                    # model_evict_times = list(map(lambda m: SameMachineGPUtoCPU_delay(m.model_size), models_to_evict))
+                    # eviction_duration = max(model_evict_times)
+                    # full_eviction_end = current_time + eviction_duration
 
                     # must reserve space to prevent other models from loading in space created here
-                    extra_to_reserve = min_required_memory - sum(m.model_size for m in models_to_evict)
-                    if extra_to_reserve > 0:
-                        self.GPU_state.reserve_model_space(None, extra_to_reserve, current_time, full_eviction_end)
+                    # extra_to_reserve = min_required_memory - sum(m.model_size for m in models_to_evict)
+                    # if extra_to_reserve > 0:
+                    #     self.GPU_state.reserve_model_space(None, extra_to_reserve, current_time, full_eviction_end)
 
                     for i in range(len(models_to_evict)):
                         self.simulation.metadata_service.rm_model_cached_location(
                             models_to_evict[i], self.worker_id, current_time)
-                        self.GPU_state.evict_model(models_to_evict[i], 
-                                                   current_time, 
-                                                   model_evict_times[i],
-                                                   reserve_until=full_eviction_end)
+                        self.GPU_state.evict_model(models_to_evict[i], current_time, 0)
                         
                         self.model_history_log.loc[len(self.model_history_log)] = {
                             "start_time": current_time,
-                            "end_time": current_time + eviction_duration, 
+                            "end_time": current_time ,
                             "model_id": models_to_evict[i].model_id,
                             "placed_or_evicted": "evicted"
                         }
-                    return eviction_duration
+                    return 0
         return 0
 
     # ------------------------- cached model history update helper functions ---------------
@@ -142,4 +138,4 @@ def get_history(self, history, current_time, info_staleness) -> list:
             if history[last_index][0] <= delayed_time:
                 return history[last_index][1].copy()
             last_index -= 1  # check the previous one
-        return []
\ No newline at end of file
+        return []