az275 · tamitakada · Jun 3, 2025 · Jun 3, 2025 · Jun 4, 2025 · Jun 4, 2025
diff --git a/core/config.py b/core/config.py
@@ -1,14 +1,14 @@
 """ --------      Worker Machines Parameters      -------- """
-GPU_MEMORY_SIZE = 14000000  # in KB, 15BG for Tesla T4
+GPU_MEMORY_SIZE = 24000000  # in KB, 24GB for NVIDIA A30
 
-TOTAL_NUM_OF_WORKERS = 140
+TOTAL_NUM_OF_WORKERS = 50
 
 
 """  --------       Workload Parameters    --------  """
-TOTAL_NUM_OF_JOBS = 1000
+TOTAL_NUM_OF_JOBS = 10000
 
 # The interval between two consecutive job creation events at each external client 
-DEFAULT_CREATION_INTERVAL_PERCLIENT = 100     # ms. 
+DEFAULT_CREATION_INTERVAL_PERCLIENT = 0.2     # ms.
 
 WORKLOAD_DISTRIBUTION = "POISON"  # UNIFORM | POISON | GAMMA
 
@@ -20,4 +20,4 @@
 
 PLACEMENT_INFORMATION_STALENESS = 1  # in ms
 
-RESCHEDULE_THREASHOLD = 1.5
+RESCHEDULE_THREASHOLD = 1.5
diff --git a/core/events.py b/core/events.py
@@ -150,20 +150,21 @@ def run(self, current_time):
     def to_string(self):
         return "[Intermediate Results Arrival]: worker:" + str(self.worker.worker_id) + ", prev_task_id:" + str(self.prev_task.task_id) + ", cur_task_id:" + str(self.cur_task.task_id)
 
+class BatchEndEvent(Event):
+    """ Event to signify that a BATCH has been performed by the WORKER. """
 
-class TaskEndEvent(Event):
-    """ Event to signify that a TASK has been performed by the WORKER. """
-
-    def __init__(self, worker, job_id=-1, task_id=-1):
+    def __init__(self, worker, model, job_ids=[], task_type=(-1, -1)):
         self.worker = worker
-        self.job_id = job_id    # integer representing the job_id
-        self.task_id = task_id  # integer representing the task_id
+        self.model = model
+        self.job_ids = job_ids    # integers representing the job_ids
+        self.task_type = task_type # (workflow_id, task_id)
 
     def run(self, current_time):
-        return self.worker.free_slot(current_time)
+        return self.worker.free_slot(current_time, self.model, self.task_type)
 
     def to_string(self):
-        return "[Task End (Job {} - Task {}) at Worker {}] ===".format(self.job_id, self.task_id, self.worker.worker_id)
+        jobs = ",".join([str(id) for id in self.job_ids])
+        return f"[Batch End (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]"
 
 
 # for PER_JOB scheduler
@@ -199,7 +200,6 @@ def run(self, current_time):
     def to_string(self):
         return "[Job End] ==="
 
-
 class EventOrders:
     """
     Used so that the Simulation keeps track of the priority queue order

diff --git a/core/job.py b/core/job.py
@@ -81,10 +81,15 @@ def job_generate_from_workflow(self):
 
             current_task = Task(self.id,  # ID of the associated unique Job
                                 task_cfg["TASK_INDEX"],  # taskID
+                                (self.job_type_id, task_cfg["TASK_INDEX"]), # task type
                                 task_cfg["EXECUTION_TIME"], 
                                 required_model_for_task, 
                                 task_cfg["INPUT_SIZE"],
-                                task_cfg["OUTPUT_SIZE"])  
+                                task_cfg["OUTPUT_SIZE"],
+                                task_cfg["MAX_BATCH_SIZE"],
+                                task_cfg["MAX_WAIT_TIME"],
+                                task_cfg["BATCH_SIZES"],
+                                task_cfg["BATCH_EXEC_TIME"])
 
             self.tasks.append(current_task)
 

diff --git a/core/simulation.py b/core/simulation.py
@@ -3,7 +3,6 @@
 is referenced from Sparrow: https://github.com/radlab/sparrow 
 '''
 
-import imp
 import numpy as np
 from matplotlib import pyplot as plt
 from core.config import *
@@ -44,6 +43,9 @@ def __init__(
         # Tracking measurements
         self.result_to_export = pd.DataFrame()
         self.tasks_logging_times = pd.DataFrame()
+        self.event_log = pd.DataFrame(columns=["time", "event"])
+        self.batch_exec_log = pd.DataFrame(columns=["time", "worker_id", "workflow_id", "task_id", "batch_size", "model_exec_time", "batch_exec_time", "job_ids"])
+
         print("---- SIMULATION : " + self.simulation_name + "----")
         self.produce_breakdown =  produce_breakdown
 
@@ -104,8 +106,8 @@ def run_finish(self, last_time, by_job_type=False):
     def produce_time_breakdown_results(self, completed_jobs):
 
         dataframe = pd.DataFrame(columns=["job_id", "load_info_staleness", "placement_info_staleness", "req_inter_arrival_delay",
-                                          "workflow_type", "scheduler_type", "slowdown", "response_time"])
-        dataframe_tasks_log = pd.DataFrame(columns=["workflow_type", "task_id", "time_to_buffer", "dependency_wait_time",
+                                          "workflow_type", "job_create_time", "scheduler_type", "slowdown", "response_time"])
+        dataframe_tasks_log = pd.DataFrame(columns=["workflow_type", "task_id", "task_arrival_time", "task_start_exec_time", "time_to_buffer", "dependency_wait_time",
                                                     "time_spent_in_queue", "model_fetching_time", "execution_time"])
 
         for index, completed_job in enumerate(completed_jobs):
@@ -120,7 +122,7 @@ def produce_time_breakdown_results(self, completed_jobs):
             if "JOB_CREATION_INTERVAL" in WORKFLOW_LIST[completed_job.job_type_id]:
                 job_creation_interval = WORKFLOW_LIST[completed_job.job_type_id]["JOB_CREATION_INTERVAL"]
             dataframe.loc[index] = [index, LOAD_INFORMATION_STALENESS, PLACEMENT_INFORMATION_STALENESS, job_creation_interval, completed_job.job_type_id,
-                                    self.simulation_name, slowdown, response_time]
+                                    completed_job.create_time, self.simulation_name, slowdown, response_time]
 
         task_index = 0
         for job in completed_jobs:
@@ -141,9 +143,10 @@ def produce_time_breakdown_results(self, completed_jobs):
                 assert model_fetching_time >= 0
                 assert execution_time >= 0
 
-                dataframe_tasks_log.loc[task_index] = [job.job_type_id, task.task_id, time_to_buffer,
-                                                       dependency_wait_time, time_spent_in_queue, model_fetching_time, execution_time]
+                dataframe_tasks_log.loc[task_index] = [job.job_type_id, task.task_id, task.log.task_arrival_at_worker_buffer_timestamp, 
+                                                       task.log.task_execution_start_timestamp,time_to_buffer, dependency_wait_time, 
+                                                       time_spent_in_queue, model_fetching_time, execution_time]
                 task_index += 1
 
         self.tasks_logging_times = dataframe_tasks_log
-        self.result_to_export = dataframe
+        self.result_to_export = dataframe
diff --git a/core/task.py b/core/task.py
@@ -2,16 +2,23 @@
 
 
 class Task(object):
-    def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size):
+    def __init__(self, job_id, task_id, task_type, task_exec_duration, 
+                 required_model, input_size, result_size, max_batch_size, 
+                 max_wait_time, batch_sizes, batch_exec_time):
         self.job_id = job_id                           # id of the job the task belongs to
         self.task_id = task_id                         # id of the task itself
+        self.task_type = task_type                     # (workflow_id, task_id)
         # the time it takes to execute the task
         self.task_exec_duration = task_exec_duration
         # required model_id to execute the task. None if it is a computation task that doesn't involve ML model
         self.model = required_model
         # task input size to model. 
         self.input_size = input_size
         self.result_size = result_size                 # output size
+        self.max_batch_size = max_batch_size
+        self.max_wait_time = max_wait_time
+        self.batch_sizes = batch_sizes
+        self.batch_exec_time = batch_exec_time
         # list of Tasks (inputs) that this task requires ( list will be appended as the job generated)
         self.required_task_ids = []                        # list of task ids
         self.next_task_ids = []                            # list of task ids