From 4c35e871c4665543792f4dfc97c30762457baffc Mon Sep 17 00:00:00 2001 From: Ann Zhang Date: Tue, 3 Jun 2025 12:30:55 -0400 Subject: [PATCH 01/41] wip batching implementation --- core/job.py | 5 +- core/simulation.py | 1 - core/task.py | 5 +- core/workflow.py | 131 ++++++++++++++++++++++++++++-------------- workers/taskworker.py | 7 +++ 5 files changed, 103 insertions(+), 46 deletions(-) diff --git a/core/job.py b/core/job.py index c5b6ec3..aa8f9b8 100644 --- a/core/job.py +++ b/core/job.py @@ -84,7 +84,10 @@ def job_generate_from_workflow(self): task_cfg["EXECUTION_TIME"], required_model_for_task, task_cfg["INPUT_SIZE"], - task_cfg["OUTPUT_SIZE"]) + task_cfg["OUTPUT_SIZE"], + task_cfg["MAX_BATCH_SIZE"], + task_cfg["MAX_WAIT_TIME"], + task_cfg["SLOWDOWN_FACTOR"]) self.tasks.append(current_task) diff --git a/core/simulation.py b/core/simulation.py index 8aca6bd..93afe3a 100644 --- a/core/simulation.py +++ b/core/simulation.py @@ -3,7 +3,6 @@ is referenced from Sparrow: https://github.com/radlab/sparrow ''' -import imp import numpy as np from matplotlib import pyplot as plt from core.config import * diff --git a/core/task.py b/core/task.py index f97fdce..ba8ccff 100644 --- a/core/task.py +++ b/core/task.py @@ -2,7 +2,7 @@ class Task(object): - def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size): + def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size, max_batch_size, max_wait_time, slowdown_factor): self.job_id = job_id # id of the job the task belongs to self.task_id = task_id # id of the task itself # the time it takes to execute the task @@ -12,6 +12,9 @@ def __init__(self, job_id, task_id, task_exec_duration, required_model, input_si # task input size to model. self.input_size = input_size self.result_size = result_size # output size + self.max_batch_size = max_batch_size + self.max_wait_time = max_wait_time + self.slowdown_factor = slowdown_factor # list of Tasks (inputs) that this task requires ( list will be appended as the job generated) self.required_task_ids = [] # list of task ids self.next_task_ids = [] # list of task ids diff --git a/core/workflow.py b/core/workflow.py index 4d5fd9f..e3c6b7c 100644 --- a/core/workflow.py +++ b/core/workflow.py @@ -9,52 +9,67 @@ "TASKS": [{"MODEL_NAME": "OPT", "MODEL_ID": 0, "TASK_INDEX": 0, - "PREV_TASK_INDEX": [], - "NEXT_TASK_INDEX": [1,2,3], - "MODEL_SIZE": 5720000, # in kB - "INPUT_SIZE": 1, - "OUTPUT_SIZE": 2, # in kB - "EXECUTION_TIME": 561 # avg time, in ms + "PREV_TASK_INDEX": [], + "NEXT_TASK_INDEX": [1,2,3], + "MODEL_SIZE": 5720000, # in kB + "INPUT_SIZE": 1, + "OUTPUT_SIZE": 2, # in kB + "EXECUTION_TIME": 561, # avg time, in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "marian", "MODEL_ID": 1, "TASK_INDEX": 1, "PREV_TASK_INDEX": [0], "NEXT_TASK_INDEX": [4], - "MODEL_SIZE": 800000, # in kB + "MODEL_SIZE": 800000, # in kB "INPUT_SIZE": 2, "OUTPUT_SIZE": 2, - "EXECUTION_TIME": 441 # in ms + "EXECUTION_TIME": 441, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "mt5", "MODEL_ID": 2, "TASK_INDEX": 2, "PREV_TASK_INDEX": [0], "NEXT_TASK_INDEX": [4], - "MODEL_SIZE": 2000000, # in KB + "MODEL_SIZE": 2000000, # in KB "INPUT_SIZE": 2, "OUTPUT_SIZE": 2, - "EXECUTION_TIME": 778 # in ms + "EXECUTION_TIME": 778, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "mt5", "MODEL_ID": 2, "TASK_INDEX": 3, "PREV_TASK_INDEX": [0], "NEXT_TASK_INDEX": [4], - "MODEL_SIZE": 2000000, # in KB + "MODEL_SIZE": 2000000, # in KB "INPUT_SIZE": 2, "OUTPUT_SIZE": 2, - "EXECUTION_TIME": 803 # in ms + "EXECUTION_TIME": 803, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "", "MODEL_ID": -1, "TASK_INDEX": 4, "PREV_TASK_INDEX": [1,2,3], "NEXT_TASK_INDEX": [], - "MODEL_SIZE": 0, # in KB + "MODEL_SIZE": 0, # in KB "INPUT_SIZE": 2, "OUTPUT_SIZE": 2, - "EXECUTION_TIME": 1 # in ms + "EXECUTION_TIME": 1, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, ] }, @@ -66,12 +81,15 @@ "TASKS": [{"MODEL_NAME": "OPT", "MODEL_ID": 0, "TASK_INDEX": 0, - "PREV_TASK_INDEX": [], - "NEXT_TASK_INDEX": [1], - "MODEL_SIZE": 5720000, # in kB - "INPUT_SIZE": 1, - "OUTPUT_SIZE": 2, # in kB - "EXECUTION_TIME": 560 # avg time, in ms + "PREV_TASK_INDEX": [], + "NEXT_TASK_INDEX": [1], + "MODEL_SIZE": 5720000, # in kB + "INPUT_SIZE": 1, + "OUTPUT_SIZE": 2, # in kB + "EXECUTION_TIME": 560, # avg time, in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "NLI", "MODEL_ID": 3, @@ -79,9 +97,12 @@ "PREV_TASK_INDEX": [0], "NEXT_TASK_INDEX": [], "MODEL_SIZE": 2140000, # in kB - "INPUT_SIZE": 1, + "INPUT_SIZE": 1, "OUTPUT_SIZE": 1, - "EXECUTION_TIME": 27 # in ms + "EXECUTION_TIME": 27, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time } ] }, @@ -94,40 +115,52 @@ "TASK_INDEX": 0, "PREV_TASK_INDEX": [], "NEXT_TASK_INDEX": [1,2], - "MODEL_SIZE": 1700000, # in kB - "INPUT_SIZE": 3000, # 224 x 224 x 3 shape, assuming 64 bits representation + "MODEL_SIZE": 1700000, # in kB + "INPUT_SIZE": 3000, # 224 x 224 x 3 shape, assuming 64 bits representation "OUTPUT_SIZE": 20, - "EXECUTION_TIME": 283 # avg time, in ms + "EXECUTION_TIME": 283, # avg time, in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "NLI", "MODEL_ID": 3, "TASK_INDEX": 1, "PREV_TASK_INDEX": [0], "NEXT_TASK_INDEX": [3], - "MODEL_SIZE": 2140000, # in kB - "INPUT_SIZE": 20, # 299×299, assuming 64 bits representation + "MODEL_SIZE": 2140000, # in kB + "INPUT_SIZE": 20, # 299×299, assuming 64 bits representation "OUTPUT_SIZE": 10, - "EXECUTION_TIME": 26 # in ms + "EXECUTION_TIME": 26, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "txt2speech", "MODEL_ID": 5, "TASK_INDEX": 2, "PREV_TASK_INDEX": [0], "NEXT_TASK_INDEX": [3], - "MODEL_SIZE": 2700000, # in kB + "MODEL_SIZE": 2700000, # in kB "INPUT_SIZE": 20, "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 76 # in ms + "EXECUTION_TIME": 76, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "aggregate", "MODEL_ID": -1, "TASK_INDEX": 3, "PREV_TASK_INDEX": [1,2], "NEXT_TASK_INDEX": [], - "MODEL_SIZE": -1, # in kB + "MODEL_SIZE": -1, # in kB "INPUT_SIZE": 3000, "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 0.2 # in ms + "EXECUTION_TIME": 0.2, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time } ] }, @@ -140,40 +173,52 @@ "TASK_INDEX": 0, "PREV_TASK_INDEX": [], "NEXT_TASK_INDEX": [1,2], - "MODEL_SIZE": -1, # in kB - "INPUT_SIZE": 3000, + "MODEL_SIZE": -1, # in kB + "INPUT_SIZE": 3000, "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 0.6 # avg time, in ms + "EXECUTION_TIME": 0.6, # avg time, in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "DETR", "MODEL_ID": 8, "TASK_INDEX": 1, "PREV_TASK_INDEX": [0], "NEXT_TASK_INDEX": [3], - "MODEL_SIZE": 1800000, # in kB - "INPUT_SIZE": 3000, # 299×299, assuming 64 bits representation - "OUTPUT_SIZE": 3000, # - "EXECUTION_TIME": 178 # in ms + "MODEL_SIZE": 1800000, # in kB + "INPUT_SIZE": 3000, # 299×299, assuming 64 bits representation + "OUTPUT_SIZE": 3000, + "EXECUTION_TIME": 178, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "Depth", "MODEL_ID": 9, "TASK_INDEX": 2, "PREV_TASK_INDEX": [0], "NEXT_TASK_INDEX": [3], - "MODEL_SIZE": 3900000, # in kB + "MODEL_SIZE": 3900000, # in kB "INPUT_SIZE": 3000, "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 147 # in ms + "EXECUTION_TIME": 147, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time }, {"MODEL_NAME": "Aggregate", "MODEL_ID": -1, "TASK_INDEX": 3, "PREV_TASK_INDEX": [1,2], "NEXT_TASK_INDEX": [], - "MODEL_SIZE": -1, # in kB + "MODEL_SIZE": -1, # in kB "INPUT_SIZE": 3000, "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 104 # in ms + "EXECUTION_TIME": 104, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 5000, # ms + "SLOWDOWN_FACTOR": 1.2 # batch execution time } ] }, diff --git a/workers/taskworker.py b/workers/taskworker.py index 73df922..e81a0eb 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -74,12 +74,19 @@ def maybe_start_task(self, current_time): if (current_time >= task.log.task_placed_on_worker_queue_timestamp): # if self.worker_id == 2: # print("time{}, exec_task {}. job_start_time: {}, job_type: {} ".format(current_time, task, self.simulation.jobs[task.job_id].create_time, self.simulation.jobs[task.job_id].job_type_id)) + + # execute batch subject to the following constraints: + # batch cannot exceed max_batch_size + # a task cannot wait longer than max_wait_time task_end_events, task_end_time = self.task_execute( task, current_time) self.rm_task_in_queue_history(task, current_time) break return task_end_events + # modify to handle a batch of tasks: + # need to model batch execution duration + # transfer to next step should handle a list of tasks def task_execute(self, task, current_time): self.involved = True self.num_free_slots -= 1 From ff389c61da6e538c4cd0467ad7dc30ad11238a3a Mon Sep 17 00:00:00 2001 From: Ann Zhang Date: Tue, 3 Jun 2025 15:55:35 -0400 Subject: [PATCH 02/41] batching config --- core/job.py | 3 ++- core/task.py | 5 +++-- core/workflow.py | 45 ++++++++++++++++++++++++++++--------------- workers/taskworker.py | 6 +++--- 4 files changed, 38 insertions(+), 21 deletions(-) diff --git a/core/job.py b/core/job.py index aa8f9b8..6d9822f 100644 --- a/core/job.py +++ b/core/job.py @@ -87,7 +87,8 @@ def job_generate_from_workflow(self): task_cfg["OUTPUT_SIZE"], task_cfg["MAX_BATCH_SIZE"], task_cfg["MAX_WAIT_TIME"], - task_cfg["SLOWDOWN_FACTOR"]) + task_cfg["BATCH_SIZES"], + task_cfg["BATCH_EXEC_TIME"]) self.tasks.append(current_task) diff --git a/core/task.py b/core/task.py index ba8ccff..b490e52 100644 --- a/core/task.py +++ b/core/task.py @@ -2,7 +2,7 @@ class Task(object): - def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size, max_batch_size, max_wait_time, slowdown_factor): + def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size, max_batch_size, max_wait_time, batch_sizes, batch_exec_time): self.job_id = job_id # id of the job the task belongs to self.task_id = task_id # id of the task itself # the time it takes to execute the task @@ -14,7 +14,8 @@ def __init__(self, job_id, task_id, task_exec_duration, required_model, input_si self.result_size = result_size # output size self.max_batch_size = max_batch_size self.max_wait_time = max_wait_time - self.slowdown_factor = slowdown_factor + self.batch_sizes = batch_sizes + self.batch_exec_time = batch_exec_time # list of Tasks (inputs) that this task requires ( list will be appended as the job generated) self.required_task_ids = [] # list of task ids self.next_task_ids = [] # list of task ids diff --git a/core/workflow.py b/core/workflow.py index e3c6b7c..fc8f7a4 100644 --- a/core/workflow.py +++ b/core/workflow.py @@ -17,7 +17,8 @@ "EXECUTION_TIME": 561, # avg time, in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [561, 561, 561, 561, 561] }, {"MODEL_NAME": "marian", "MODEL_ID": 1, @@ -30,7 +31,8 @@ "EXECUTION_TIME": 441, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [441, 441, 441, 441, 441] }, {"MODEL_NAME": "mt5", "MODEL_ID": 2, @@ -43,7 +45,8 @@ "EXECUTION_TIME": 778, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [778, 778, 778, 778, 778] }, {"MODEL_NAME": "mt5", "MODEL_ID": 2, @@ -56,7 +59,8 @@ "EXECUTION_TIME": 803, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [803, 803, 803, 803, 803] }, {"MODEL_NAME": "", "MODEL_ID": -1, @@ -69,7 +73,8 @@ "EXECUTION_TIME": 1, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [1, 1, 1, 1, 1] }, ] }, @@ -89,7 +94,8 @@ "EXECUTION_TIME": 560, # avg time, in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [560, 560, 560, 560, 560] }, {"MODEL_NAME": "NLI", "MODEL_ID": 3, @@ -102,7 +108,8 @@ "EXECUTION_TIME": 27, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [27, 27, 27, 27, 27] } ] }, @@ -121,7 +128,8 @@ "EXECUTION_TIME": 283, # avg time, in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [283, 283, 283, 283, 283] }, {"MODEL_NAME": "NLI", "MODEL_ID": 3, @@ -134,7 +142,8 @@ "EXECUTION_TIME": 26, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [26, 26, 26, 26, 26] }, {"MODEL_NAME": "txt2speech", "MODEL_ID": 5, @@ -147,7 +156,8 @@ "EXECUTION_TIME": 76, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [76, 76, 76, 76, 76] }, {"MODEL_NAME": "aggregate", "MODEL_ID": -1, @@ -160,7 +170,8 @@ "EXECUTION_TIME": 0.2, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [0.2, 0.2, 0.2, 0.2, 0.2] } ] }, @@ -179,7 +190,8 @@ "EXECUTION_TIME": 0.6, # avg time, in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [0.6, 0.6, 0.6, 0.6, 0.6] }, {"MODEL_NAME": "DETR", "MODEL_ID": 8, @@ -192,7 +204,8 @@ "EXECUTION_TIME": 178, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [178, 178, 178, 178, 178] }, {"MODEL_NAME": "Depth", "MODEL_ID": 9, @@ -205,7 +218,8 @@ "EXECUTION_TIME": 147, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [147, 147, 147, 147, 147] }, {"MODEL_NAME": "Aggregate", "MODEL_ID": -1, @@ -218,7 +232,8 @@ "EXECUTION_TIME": 104, # in ms "MAX_BATCH_SIZE": 16, "MAX_WAIT_TIME": 5000, # ms - "SLOWDOWN_FACTOR": 1.2 # batch execution time + "BATCH_SIZES": [1, 2, 4, 8, 16], + "BATCH_EXEC_TIME": [104, 104, 104, 104, 104] } ] }, diff --git a/workers/taskworker.py b/workers/taskworker.py index e81a0eb..879d02b 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -63,6 +63,8 @@ def schedule_job_heft(self, current_time, job): # --------------------------- TASK EXECUTION ---------------------- + # new event for modeling max_wait_time + # wake up thread in intervals of no more than max_wait_time def maybe_start_task(self, current_time): task_end_events = [] task_list = self.get_queue_history(current_time, info_staleness=0) @@ -75,9 +77,7 @@ def maybe_start_task(self, current_time): # if self.worker_id == 2: # print("time{}, exec_task {}. job_start_time: {}, job_type: {} ".format(current_time, task, self.simulation.jobs[task.job_id].create_time, self.simulation.jobs[task.job_id].job_type_id)) - # execute batch subject to the following constraints: - # batch cannot exceed max_batch_size - # a task cannot wait longer than max_wait_time + # form and execute batch task_end_events, task_end_time = self.task_execute( task, current_time) self.rm_task_in_queue_history(task, current_time) From 18741b6e613b741fdc1cbbe07821871ff79c0b23 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Wed, 4 Jun 2025 10:02:16 -0400 Subject: [PATCH 03/41] worker wake up after max wait time --- workers/taskworker.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/workers/taskworker.py b/workers/taskworker.py index 879d02b..489c426 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -4,6 +4,7 @@ from core.network import * from core.events import * from schedulers.algo.nav_heft_algo import * +import time class TaskWorker(Worker): @@ -14,6 +15,7 @@ def __init__(self, simulation, num_free_slots, worker_id): # keep track of the queue information at time: [ (time1,[task0,task1,]), (time2,[task1,...]),...] self.queue_history = [] self.involved = False + self.last_batch_end_time = None def add_task(self, current_time, task): """ @@ -66,6 +68,8 @@ def schedule_job_heft(self, current_time, job): # new event for modeling max_wait_time # wake up thread in intervals of no more than max_wait_time def maybe_start_task(self, current_time): + latest_time = current_time + task_end_events = [] task_list = self.get_queue_history(current_time, info_staleness=0) # print(task_list) @@ -80,8 +84,20 @@ def maybe_start_task(self, current_time): # form and execute batch task_end_events, task_end_time = self.task_execute( task, current_time) + latest_time = max(latest_time, task_end_time) # update worker time for wake up self.rm_task_in_queue_history(task, current_time) break + + self.last_batch_end_time = latest_time + + # print(current_time) + self.simulation.event_queue.put( + EventOrders( + latest_time + WorkerWakeUpEvent.MAX_WAIT_TIME, + WorkerWakeUpEvent(self) + ) + ) + return task_end_events # modify to handle a batch of tasks: @@ -103,6 +119,9 @@ def task_execute(self, task, current_time): task.log.task_front_queue_timestamp = current_time task.log.task_execution_start_timestamp = current_time + model_fetch_time task.log.task_execution_end_timestamp = task_end_time + + # print(f"curr: {current_time}, end: {task_end_time}") + return task_end_events, task_end_time # --------------------------- Subsequent TASK Transfer -------------------- From 04fefb905e72732207c9b0a038ead1b6478a973e Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Wed, 4 Jun 2025 10:02:42 -0400 Subject: [PATCH 04/41] wake up event --- core/events.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/core/events.py b/core/events.py index abef882..5320118 100644 --- a/core/events.py +++ b/core/events.py @@ -200,6 +200,30 @@ def to_string(self): return "[Job End] ===" +class WorkerWakeUpEvent(Event): + """ + Event to signify that max_wait_time has passed and worker should + check task queue. + """ + + # TODO: Get max wait time + MAX_WAIT_TIME = 50 # ms + + def __init__(self, worker): + self.worker = worker + + def run(self, current_time): + if self.will_run(current_time): + return self.worker.maybe_start_task(current_time) + return [] + + def to_string(self): + return f"[Worker (id: {self.worker.worker_id}) Wake Up]" + + def will_run(self, current_time): + return (self.worker.last_batch_end_time + self.MAX_WAIT_TIME) == current_time + + class EventOrders: """ Used so that the Simulation keeps track of the priority queue order From aa7eb1cd99effd7168fec9fec78122a8b3e38d5c Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Wed, 4 Jun 2025 10:03:27 -0400 Subject: [PATCH 05/41] event logging --- core/simulation.py | 2 ++ experiments/run_experiments.py | 10 ++++++++++ schedulers/centralized/simulation_central.py | 4 ++++ schedulers/decentralized/simulation_decentral.py | 6 ++++++ 4 files changed, 22 insertions(+) diff --git a/core/simulation.py b/core/simulation.py index 93afe3a..22b3f1b 100644 --- a/core/simulation.py +++ b/core/simulation.py @@ -43,6 +43,8 @@ def __init__( # Tracking measurements self.result_to_export = pd.DataFrame() self.tasks_logging_times = pd.DataFrame() + self.event_log = pd.DataFrame(columns=["time", "event"]) + print("---- SIMULATION : " + self.simulation_name + "----") self.produce_breakdown = produce_breakdown diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py index 7630b43..6fe6034 100644 --- a/experiments/run_experiments.py +++ b/experiments/run_experiments.py @@ -51,6 +51,9 @@ num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list) sim.run() + event_log = sim.event_log + event_log.to_csv(OUTPUT_FILE_NAMES["centralheft"] + "events_by_time.csv") + # result_to_export = sim.result_to_export tasks_logging_times = sim.tasks_logging_times tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["centralheft"] + "loadDelay_" + str( @@ -61,6 +64,9 @@ sim = Simulation_central(simulation_name="hashtask", job_split="PER_TASK", num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list) sim.run() + + event_log = sim.event_log + event_log.to_csv(OUTPUT_FILE_NAMES["hashtask"] + "events_by_time.csv") tasks_logging_times = sim.tasks_logging_times tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["hashtask"] + "loadDelay_" + str( @@ -80,6 +86,10 @@ # dataframe = sim.result_to_export # dataframe.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "loadDelay_" + str( # LOAD_INFORMATION_STALENESS) + "_placementDelay_" + str(PLACEMENT_INFORMATION_STALENESS) + ".csv") + + event_log = sim.event_log + event_log.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "events_by_time.csv") + tasks_logging_times = sim.tasks_logging_times tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "loadDelay_" + str( LOAD_INFORMATION_STALENESS) + "_placementDelay_" + str(PLACEMENT_INFORMATION_STALENESS) + ".csv") diff --git a/schedulers/centralized/simulation_central.py b/schedulers/centralized/simulation_central.py index 13def63..3754077 100644 --- a/schedulers/centralized/simulation_central.py +++ b/schedulers/centralized/simulation_central.py @@ -52,6 +52,10 @@ def run(self): last_time = 0 while self.remaining_jobs > 0: cur_event = self.event_queue.get() + + if type(cur_event) != WorkerWakeUpEvent or cur_event.will_run(cur_event.current_time): + self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()] + assert cur_event.current_time >= last_time last_time = cur_event.current_time new_events = cur_event.event.run(cur_event.current_time) diff --git a/schedulers/decentralized/simulation_decentral.py b/schedulers/decentralized/simulation_decentral.py index b716d7e..0ba0f22 100644 --- a/schedulers/decentralized/simulation_decentral.py +++ b/schedulers/decentralized/simulation_decentral.py @@ -1,3 +1,5 @@ +import pandas as pd + from queue import PriorityQueue from core.simulation import * @@ -48,6 +50,10 @@ def run(self): last_time = 0 while self.remaining_jobs > 0: cur_event = self.event_queue.get() + + if type(cur_event) != WorkerWakeUpEvent or cur_event.will_run(cur_event.current_time): + self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()] + assert cur_event.current_time >= last_time last_time = cur_event.current_time new_events = cur_event.event.run(cur_event.current_time) From d4ae0f1faa5d0650bd0963400e78316d581559e2 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Wed, 4 Jun 2025 17:08:38 -0400 Subject: [PATCH 06/41] batching --- core/events.py | 43 +++++++++++--- workers/taskworker.py | 131 ++++++++++++++++++++++++++++++------------ 2 files changed, 127 insertions(+), 47 deletions(-) diff --git a/core/events.py b/core/events.py index 5320118..d7515bd 100644 --- a/core/events.py +++ b/core/events.py @@ -166,6 +166,22 @@ def to_string(self): return "[Task End (Job {} - Task {}) at Worker {}] ===".format(self.job_id, self.task_id, self.worker.worker_id) +class BatchEndEvent(Event): + """ Event to signify that a BATCH has been performed by the WORKER. """ + + def __init__(self, worker, job_ids=[], task_id=-1): + self.worker = worker + self.job_ids = job_ids # integers representing the job_ids + self.task_id = task_id # integer representing the task_id + + def run(self, current_time): + return self.worker.free_slot(current_time) + + def to_string(self): + jobs = ",".join([str(id) for id in self.job_ids]) + return f"[Batch End (Task {self.task_id}, Jobs {jobs}) at Worker {self.worker.worker_id}]" + + # for PER_JOB scheduler class JobAssignEvent(Event): """ @@ -206,22 +222,31 @@ class WorkerWakeUpEvent(Event): check task queue. """ - # TODO: Get max wait time - MAX_WAIT_TIME = 50 # ms - - def __init__(self, worker): + def __init__(self, worker, task_id, task_max_wait_time): self.worker = worker + self.task_id = task_id + self.task_max_wait_time = task_max_wait_time def run(self, current_time): - if self.will_run(current_time): - return self.worker.maybe_start_task(current_time) - return [] + # print(f"RUN: {self.will_run(current_time)}") + # if not self.will_run(current_time): + # print(self.worker.last_queue_check_times) + # print(f"EXPECT: {self.worker.last_queue_check_times[self.task_id] + self.task_max_wait_time}; ONLY AT {current_time}") + + # if self.will_run(current_time): + _, task_end_events = self.worker.maybe_start_task_for_type( + current_time, self.task_id, self.task_max_wait_time, True + ) + return task_end_events + # return [] def to_string(self): - return f"[Worker (id: {self.worker.worker_id}) Wake Up]" + return f"[Worker (id: {self.worker.worker_id}) Wake Up (task id: {self.task_id})]" def will_run(self, current_time): - return (self.worker.last_batch_end_time + self.MAX_WAIT_TIME) == current_time + if self.task_id in self.worker.next_check_times: + return current_time >= self.worker.next_check_times[self.task_id] + return True # if no batch has been run yet, wake up should be executed class EventOrders: diff --git a/workers/taskworker.py b/workers/taskworker.py index 489c426..c49e1cc 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -15,21 +15,25 @@ def __init__(self, simulation, num_free_slots, worker_id): # keep track of the queue information at time: [ (time1,[task0,task1,]), (time2,[task1,...]),...] self.queue_history = [] self.involved = False - self.last_batch_end_time = None + self.next_check_times = {} def add_task(self, current_time, task): """ Add task into the local task queue """ + + # print(f"[{current_time}] W{self.worker_id}: T{task.task_id} arrived") + # Update when the task is sent to the worker assert (task.log.task_placed_on_worker_queue_timestamp <= current_time) self.add_task_to_queue_history(task, current_time) - return self.maybe_start_task(current_time) + _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_id, task.max_wait_time, False) + return task_end_events def free_slot(self, current_time): """ Frees a slot on the worker and attempts to launch another task in that slot. """ self.num_free_slots += 1 - get_task_events = self.maybe_start_task(current_time) + get_task_events = self.maybe_start_task_any(current_time) return get_task_events # --------------------------- DECENTRALIZED WORKER SCHEDULING ---------------------- @@ -65,62 +69,113 @@ def schedule_job_heft(self, current_time, job): # --------------------------- TASK EXECUTION ---------------------- - # new event for modeling max_wait_time - # wake up thread in intervals of no more than max_wait_time - def maybe_start_task(self, current_time): + def maybe_start_task_any(self, current_time): + task_list = self.get_queue_history(current_time, info_staleness=0) + + queued_tasks = queue.Queue() + [queued_tasks.put(task) for task in task_list] + while (not queued_tasks.empty()) and self.num_free_slots > 0: + task = queued_tasks.get() + if (current_time >= task.log.task_placed_on_worker_queue_timestamp): + did_exec_batch, task_end_events = self.maybe_start_task_for_type( + current_time, task.task_id, task.max_wait_time, False + ) + if did_exec_batch: + return task_end_events + # keep checking queue until batch is executed or tasks run out + + # if no queued tasks, maybe is never called and no wake up events are + # appended; assume in this case worker will be woken up when any new + # task arrives + return [] + + + def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_exec_batch) -> tuple[bool, list]: + """ + Returns did_exec_batch : bool, task_end_events : list[Event] + """ latest_time = current_time + did_exec_batch = False task_end_events = [] - task_list = self.get_queue_history(current_time, info_staleness=0) - # print(task_list) + task_list = [task for task in self.get_queue_history(current_time, info_staleness=0) + if task.task_id == task_type] + queued_tasks = queue.Queue() [queued_tasks.put(task) for task in task_list] - while (not queued_tasks.empty()) and self.num_free_slots > 0: + + batch = [] + while (not queued_tasks.empty()) and self.num_free_slots > 0 and len(batch) < task_list[0].max_batch_size: task = queued_tasks.get() if (current_time >= task.log.task_placed_on_worker_queue_timestamp): - # if self.worker_id == 2: - # print("time{}, exec_task {}. job_start_time: {}, job_type: {} ".format(current_time, task, self.simulation.jobs[task.job_id].create_time, self.simulation.jobs[task.job_id].job_type_id)) + batch.append(task) + + # full batch or max wait time has passed + if len(task_list) > 0 and self.num_free_slots > 0 \ + and (do_exec_batch or len(batch) >= task_list[0].max_batch_size): - # form and execute batch - task_end_events, task_end_time = self.task_execute( - task, current_time) - latest_time = max(latest_time, task_end_time) # update worker time for wake up + batch_end_events, task_end_time = self.batch_execute( + batch, current_time) + + # rm all tasks in batch + for task in batch: self.rm_task_in_queue_history(task, current_time) - break - self.last_batch_end_time = latest_time + latest_time = task_end_time - # print(current_time) - self.simulation.event_queue.put( + did_exec_batch = True + task_end_events += batch_end_events + + next_check_time = latest_time + task_wait_time + + # if idle, check again in wait time + task_end_events.append( EventOrders( - latest_time + WorkerWakeUpEvent.MAX_WAIT_TIME, - WorkerWakeUpEvent(self) + next_check_time, + WorkerWakeUpEvent(self, task_type, task_wait_time) ) ) + self.next_check_times[task_type] = next_check_time - return task_end_events + return did_exec_batch, task_end_events # modify to handle a batch of tasks: # need to model batch execution duration # transfer to next step should handle a list of tasks - def task_execute(self, task, current_time): + def batch_execute(self, tasks, current_time): self.involved = True self.num_free_slots -= 1 - model_fetch_time = self.fetch_model(task.model, current_time) - task_end_time = current_time + model_fetch_time + task.task_exec_duration - events = self.send_result_to_next_workers( - task_end_time, task) - task_end_events = events - task_end_events.append(EventOrders(task_end_time, TaskEndEvent( - self, job_id=task.job_id, task_id=task.task_id))) - self.simulation.add_job_completion_time( - task.job_id, task.task_id, task_end_time) - # task log tracking - task.log.task_front_queue_timestamp = current_time - task.log.task_execution_start_timestamp = current_time + model_fetch_time - task.log.task_execution_end_timestamp = task_end_time - - # print(f"curr: {current_time}, end: {task_end_time}") + model_fetch_time = self.fetch_model(tasks[0].model, current_time) + + batch_index = 0 + for i, batch_size in enumerate(sorted(tasks[0].batch_sizes)): # assumes batch_sizes are sorted + if len(tasks) <= batch_size: + batch_index = i + break + + task_end_time = current_time + model_fetch_time + tasks[0].batch_exec_time[batch_index] + task_end_events = [] + + job_ids = [] + + for task in tasks: + events = self.send_result_to_next_workers( + task_end_time, task) + task_end_events += events + + self.simulation.add_job_completion_time( + task.job_id, task.task_id, task_end_time) + + job_ids.append(task.job_id) + + # task log tracking + task.log.task_front_queue_timestamp = current_time + task.log.task_execution_start_timestamp = current_time + model_fetch_time + task.log.task_execution_end_timestamp = task_end_time + + task_end_events.append(EventOrders(task_end_time, BatchEndEvent( + self, job_ids=job_ids, task_id=tasks[0].task_id + ))) return task_end_events, task_end_time From b9dcf0265616ad600cbae414371ad9731d3102e9 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Thu, 5 Jun 2025 10:49:40 -0400 Subject: [PATCH 07/41] comments --- core/events.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/core/events.py b/core/events.py index d7515bd..db35834 100644 --- a/core/events.py +++ b/core/events.py @@ -228,22 +228,18 @@ def __init__(self, worker, task_id, task_max_wait_time): self.task_max_wait_time = task_max_wait_time def run(self, current_time): - # print(f"RUN: {self.will_run(current_time)}") - # if not self.will_run(current_time): - # print(self.worker.last_queue_check_times) - # print(f"EXPECT: {self.worker.last_queue_check_times[self.task_id] + self.task_max_wait_time}; ONLY AT {current_time}") - - # if self.will_run(current_time): - _, task_end_events = self.worker.maybe_start_task_for_type( - current_time, self.task_id, self.task_max_wait_time, True - ) - return task_end_events - # return [] + if self.will_run(current_time): + _, task_end_events = self.worker.maybe_start_task_for_type( + current_time, self.task_id, self.task_max_wait_time, True + ) + return task_end_events + return [] def to_string(self): return f"[Worker (id: {self.worker.worker_id}) Wake Up (task id: {self.task_id})]" def will_run(self, current_time): + # skip current wake up if a later wake up has been scheduled if self.task_id in self.worker.next_check_times: return current_time >= self.worker.next_check_times[self.task_id] return True # if no batch has been run yet, wake up should be executed From 7eba498553466121888b7ebae3777c590fa79bfd Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Thu, 5 Jun 2025 10:55:10 -0400 Subject: [PATCH 08/41] wake up enqueue bug fix and comments --- workers/taskworker.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/workers/taskworker.py b/workers/taskworker.py index c49e1cc..6e514a1 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -85,13 +85,15 @@ def maybe_start_task_any(self, current_time): # keep checking queue until batch is executed or tasks run out # if no queued tasks, maybe is never called and no wake up events are - # appended; assume in this case worker will be woken up when any new - # task arrives + # appended; in this case worker sleeps until a new task arrives return [] def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_exec_batch) -> tuple[bool, list]: """ + Execute a batch if 1) a batch of size max_batch_size can be created or 2) do_exec_batch is True + (do_exec_batch should be True when maybe is called by a wake up event) + Returns did_exec_batch : bool, task_end_events : list[Event] """ latest_time = current_time @@ -104,6 +106,7 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_ queued_tasks = queue.Queue() [queued_tasks.put(task) for task in task_list] + # form largest batch < max_batch_size possible batch = [] while (not queued_tasks.empty()) and self.num_free_slots > 0 and len(batch) < task_list[0].max_batch_size: task = queued_tasks.get() @@ -126,16 +129,19 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_ did_exec_batch = True task_end_events += batch_end_events + # track next wake up time so old wake ups can be skipped next_check_time = latest_time + task_wait_time + self.next_check_times[task_type] = next_check_time # if idle, check again in wait time - task_end_events.append( + # NOTE: for some reason, appending to task_end_events does not always + # lead to event being enqueued; thus we enqueue directly to sim queue here + self.simulation.event_queue.put( EventOrders( next_check_time, WorkerWakeUpEvent(self, task_type, task_wait_time) ) ) - self.next_check_times[task_type] = next_check_time return did_exec_batch, task_end_events @@ -148,15 +154,15 @@ def batch_execute(self, tasks, current_time): model_fetch_time = self.fetch_model(tasks[0].model, current_time) batch_index = 0 - for i, batch_size in enumerate(sorted(tasks[0].batch_sizes)): # assumes batch_sizes are sorted - if len(tasks) <= batch_size: + for i, batch_size in enumerate(sorted(tasks[0].batch_sizes)): + if len(tasks) <= batch_size: # choose smallest batch size > len(tasks) batch_index = i break task_end_time = current_time + model_fetch_time + tasks[0].batch_exec_time[batch_index] task_end_events = [] - job_ids = [] + job_ids = [] # for logging for task in tasks: events = self.send_result_to_next_workers( From cc2b1df779cc7b98d50dde6d0a451112e489b77a Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Thu, 5 Jun 2025 10:55:30 -0400 Subject: [PATCH 09/41] produce breakdown --- experiments/run_experiments.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py index 6fe6034..a14154e 100644 --- a/experiments/run_experiments.py +++ b/experiments/run_experiments.py @@ -48,7 +48,8 @@ # 2. Run and collect data if "centralheft" in experiment_schedulers: sim = Simulation_central(simulation_name="centralheft", job_split="PER_TASK", - num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list) + num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list, + produce_breakdown=True) sim.run() event_log = sim.event_log @@ -62,7 +63,8 @@ if "hashtask" in experiment_schedulers: OUTPUT_FILENAME = "hashtask" sim = Simulation_central(simulation_name="hashtask", job_split="PER_TASK", - num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list) + num_workers=TOTAL_NUM_OF_WORKERS, job_types_list=plotting_job_type_list, + produce_breakdown=True) sim.run() event_log = sim.event_log From 2e656717978a5b14e1860406b8bdb38c376c5925 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Fri, 6 Jun 2025 08:16:52 -0400 Subject: [PATCH 10/41] start batch whenever possible --- core/events.py | 2 +- workers/taskworker.py | 13 +++++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/core/events.py b/core/events.py index db35834..269b9e5 100644 --- a/core/events.py +++ b/core/events.py @@ -230,7 +230,7 @@ def __init__(self, worker, task_id, task_max_wait_time): def run(self, current_time): if self.will_run(current_time): _, task_end_events = self.worker.maybe_start_task_for_type( - current_time, self.task_id, self.task_max_wait_time, True + current_time, self.task_id, self.task_max_wait_time ) return task_end_events return [] diff --git a/workers/taskworker.py b/workers/taskworker.py index 6e514a1..43a3434 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -27,7 +27,7 @@ def add_task(self, current_time, task): # Update when the task is sent to the worker assert (task.log.task_placed_on_worker_queue_timestamp <= current_time) self.add_task_to_queue_history(task, current_time) - _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_id, task.max_wait_time, False) + _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_id, task.max_wait_time) return task_end_events def free_slot(self, current_time): @@ -78,7 +78,7 @@ def maybe_start_task_any(self, current_time): task = queued_tasks.get() if (current_time >= task.log.task_placed_on_worker_queue_timestamp): did_exec_batch, task_end_events = self.maybe_start_task_for_type( - current_time, task.task_id, task.max_wait_time, False + current_time, task.task_id, task.max_wait_time ) if did_exec_batch: return task_end_events @@ -89,10 +89,9 @@ def maybe_start_task_any(self, current_time): return [] - def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_exec_batch) -> tuple[bool, list]: + def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> tuple[bool, list]: """ - Execute a batch if 1) a batch of size max_batch_size can be created or 2) do_exec_batch is True - (do_exec_batch should be True when maybe is called by a wake up event) + Execute a batch if there are free slots available and at least 1 task queued. Returns did_exec_batch : bool, task_end_events : list[Event] """ @@ -114,9 +113,7 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time, do_ batch.append(task) # full batch or max wait time has passed - if len(task_list) > 0 and self.num_free_slots > 0 \ - and (do_exec_batch or len(batch) >= task_list[0].max_batch_size): - + if len(task_list) > 0 and self.num_free_slots > 0: batch_end_events, task_end_time = self.batch_execute( batch, current_time) From c813826d0cd1042404aa5d50d36ba5c0d640eebb Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Fri, 6 Jun 2025 10:31:22 -0400 Subject: [PATCH 11/41] separate tasks for each task type; updated queue wait time estimates --- workers/taskworker.py | 127 +++++++++++++++++++++++++----------------- 1 file changed, 76 insertions(+), 51 deletions(-) diff --git a/workers/taskworker.py b/workers/taskworker.py index 43a3434..296bcb6 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -4,7 +4,7 @@ from core.network import * from core.events import * from schedulers.algo.nav_heft_algo import * -import time +import itertools class TaskWorker(Worker): @@ -13,7 +13,7 @@ def __init__(self, simulation, num_free_slots, worker_id): # {task_obj1:[(preq_task_id0,arrival_time0), (preq_taks_id0, arrival_time1), ...], task2:[( ...],} self.waiting_tasks_buffer = defaultdict(lambda: []) # keep track of the queue information at time: [ (time1,[task0,task1,]), (time2,[task1,...]),...] - self.queue_history = [] + self.queue_history = {} self.involved = False self.next_check_times = {} @@ -69,21 +69,39 @@ def schedule_job_heft(self, current_time, job): # --------------------------- TASK EXECUTION ---------------------- - def maybe_start_task_any(self, current_time): - task_list = self.get_queue_history(current_time, info_staleness=0) + def get_sorted_task_ids(self, current_time) -> tuple[list[int], dict[int, list[Task]]]: + """ + Returns a list of all task_ids with at least 1 task queued on this + worker in order of when they are scheduled to execute (e.g. task queue + at index 0 is the next to be executed when a slot opens up on the worker) + in addition to a map of all task_ids to their task queues. + """ - queued_tasks = queue.Queue() - [queued_tasks.put(task) for task in task_list] - while (not queued_tasks.empty()) and self.num_free_slots > 0: - task = queued_tasks.get() - if (current_time >= task.log.task_placed_on_worker_queue_timestamp): - did_exec_batch, task_end_events = self.maybe_start_task_for_type( - current_time, task.task_id, task.max_wait_time - ) - if did_exec_batch: - return task_end_events - # keep checking queue until batch is executed or tasks run out + task_ids = self.queue_history.keys() + task_queues = { task_id: self.get_queue_history(current_time, task_id) for task_id in task_ids } + + task_ids_by_arrival = sorted( + filter(lambda task_id: len(task_queues[task_id]) > 0, task_ids), + key=lambda task_id: task_queues[task_id][0].log.task_placed_on_worker_queue_timestamp, + ) + + return task_ids_by_arrival, task_queues + + def maybe_start_task_any(self, current_time): + task_ids, task_queues = self.get_sorted_task_ids(current_time) + + if self.num_free_slots > 0: + for task_id in task_ids: + first_queued_task = task_queues[task_id][0] + if (current_time >= first_queued_task.log.task_placed_on_worker_queue_timestamp): + did_exec_batch, task_end_events = self.maybe_start_task_for_type( + current_time, task_id, first_queued_task.max_wait_time + ) + if did_exec_batch: + return task_end_events + # keep checking queue until batch is executed or tasks run out + # if no queued tasks, maybe is never called and no wake up events are # appended; in this case worker sleeps until a new task arrives return [] @@ -99,8 +117,7 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> did_exec_batch = False task_end_events = [] - task_list = [task for task in self.get_queue_history(current_time, info_staleness=0) - if task.task_id == task_type] + task_list = self.get_queue_history(current_time, task_type, info_staleness=0) queued_tasks = queue.Queue() [queued_tasks.put(task) for task in task_list] @@ -236,24 +253,25 @@ def receive_intermediate_result(self, current_time, prev_task, cur_task) -> list # ------------------------- queue history update helper functions --------------- def add_task_to_queue_history(self, task, current_time): - last_index = len(self.queue_history) - 1 - # 0. base case - if last_index == -1: - self.queue_history.append((current_time, [task])) + # 0. Base case (first entry) + if task.task_id not in self.queue_history: + self.queue_history[task.task_id] = [(current_time, [task])] return + # 1. Find the time_stamp place to add this queue information + last_index = len(self.queue_history[task.task_id]) - 1 while last_index >= 0: - if self.queue_history[last_index][0] == current_time: - if task not in self.queue_history[last_index][1]: - self.queue_history[last_index][1].append(task) + if self.queue_history[task.task_id][last_index][0] == current_time: + if task not in self.queue_history[task.task_id][last_index][1]: + self.queue_history[task.task_id][last_index][1].append(task) break - if self.queue_history[last_index][0] < current_time: + if self.queue_history[task.task_id][last_index][0] < current_time: # print("2") - if task not in self.queue_history[last_index][1]: - next_queue = self.queue_history[last_index][1].copy() + if task not in self.queue_history[task.task_id][last_index][1]: + next_queue = self.queue_history[task.task_id][last_index][1].copy() next_queue.append(task) last_index += 1 - self.queue_history.insert( + self.queue_history[task.task_id].insert( last_index, (current_time, next_queue) ) break @@ -261,47 +279,54 @@ def add_task_to_queue_history(self, task, current_time): last_index -= 1 # 2. added the task to all the subsequent timestamp tuples - while last_index < len(self.queue_history): - if task not in self.queue_history[last_index][1]: - self.queue_history[last_index][1].append(task) + while last_index < len(self.queue_history[task.task_id]): + if task not in self.queue_history[task.task_id][last_index][1]: + self.queue_history[task.task_id][last_index][1].append(task) last_index += 1 def rm_task_in_queue_history(self, task, current_time): - last_index = len(self.queue_history) - 1 # 0. base case: shouldn't happen - if last_index == -1: + if task.task_id not in self.queue_history: AssertionError("rm model cached location to an empty list") return + + last_index = len(self.queue_history[task.task_id]) - 1 + # 1. find the place to add this remove_event to the tuple list while last_index >= 0: - if self.queue_history[last_index][0] == current_time: - if task in self.queue_history[last_index][1]: - self.queue_history[last_index][1].remove(task) + if self.queue_history[task.task_id][last_index][0] == current_time: + if task in self.queue_history[task.task_id][last_index][1]: + self.queue_history[task.task_id][last_index][1].remove(task) break - if self.queue_history[last_index][0] < current_time: - if task in self.queue_history[last_index][1]: - next_tasks_in_queue = self.queue_history[last_index][1].copy() + if self.queue_history[task.task_id][last_index][0] < current_time: + if task in self.queue_history[task.task_id][last_index][1]: + next_tasks_in_queue = self.queue_history[task.task_id][last_index][1].copy() next_tasks_in_queue.remove(task) last_index = last_index + 1 - self.queue_history.insert( + self.queue_history[task.task_id].insert( last_index, (current_time, next_tasks_in_queue) ) break last_index -= 1 # go to prev time # 2. remove the task from all the subsequent tuple - while last_index < len(self.queue_history): - if task in self.queue_history[last_index]: - self.queue_history[last_index][1].remove(task) + while last_index < len(self.queue_history[task.task_id]): + if task in self.queue_history[task.task_id][last_index]: + self.queue_history[task.task_id][last_index][1].remove(task) last_index += 1 # do this for the remaining element after - def get_queue_history(self, current_time, info_staleness=0) -> list: - return self.get_history(self.queue_history, current_time, info_staleness) + def get_queue_history(self, current_time, task_id, info_staleness=0) -> list: + return self.get_history(self.queue_history[task_id], current_time, info_staleness) - def get_task_queue_waittime(self, current_time, info_staleness=0, requiring_worker_id=None): + def get_task_queue_waittime(self, current_time, task_id, info_staleness=0, requiring_worker_id=None): if requiring_worker_id != None and requiring_worker_id != self.worker_id: info_staleness = 0 - queueing_tasks = self.get_queue_history(current_time, info_staleness) - waittime = 0 - for task in queueing_tasks: - waittime += task.task_exec_duration - return waittime + + task_ids, task_queues = self.get_sorted_task_ids(current_time) + + wait_time = 0 + for queued_task_id in task_ids: + for task in task_queues[queued_task_id]: + wait_time += task.task_exec_duration + if queued_task_id == task_id: + return wait_time + return wait_time From 5de0c95e2ca1f31770961d02a8b48ccfdef51f18 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Fri, 6 Jun 2025 10:32:05 -0400 Subject: [PATCH 12/41] per task type wait times --- schedulers/algo/nav_heft_algo.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/schedulers/algo/nav_heft_algo.py b/schedulers/algo/nav_heft_algo.py index eb54589..e87656e 100644 --- a/schedulers/algo/nav_heft_algo.py +++ b/schedulers/algo/nav_heft_algo.py @@ -83,16 +83,19 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co workers[worker.worker_id] = worker sorted_tasks = ranking_tasks(job) workers_to_select = [w.worker_id for w in worker_list] - workers_EAT = {} # worker_id -> earliest_available_time + workers_EAT = {} # worker_id -> (task_id -> earliest_available_time) workers_available_memory = {} # worker_id -> available_memory # 1. initialize the earliest available time and memory for each worker for worker_id in workers_to_select: - cur_worker_waittime = 0 - if consider_load: - cur_worker_waittime = workers[worker_id].get_task_queue_waittime(current_time, \ - info_staleness=LOAD_INFORMATION_STALENESS, \ - requiring_worker_id=initial_worker_id) - workers_EAT[worker_id] = current_time + cur_worker_waittime + workers_EAT[worker_id] = { + task_id: current_time + (workers[worker_id].get_task_queue_waittime( + current_time, + task_id, + info_staleness=LOAD_INFORMATION_STALENESS, + requiring_worker_id=initial_worker_id) if consider_load else 0) + for task_id in sorted_tasks + } + available_memory = GPU_MEMORY_SIZE if consider_cache: available_memory = workers[worker_id].used_GPUmemory(current_time, \ @@ -109,7 +112,7 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co fetching_model_size = 0 for cur_worker_id in workers_to_select: # 2.0 consider the current worker queue wait time to determine its earliest start time - cur_earliest_start_time = workers_EAT[cur_worker_id] + cur_earliest_start_time = workers_EAT[cur_worker_id][task_id] # 2.1 calculate the inputs arrival time inputs_arrival_time = 0 if cur_task.task_id == 0 and initial_worker_id is not None and cur_worker_id != initial_worker_id: @@ -142,7 +145,7 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co fetching_model_size = cur_fetching_model_size # 3. pick the worker with ealiest start time cur_task_finish_time = earliest_start_time + job.tasks[task_id].task_exec_duration - workers_EAT[selected_worker_id] = cur_task_finish_time + workers_EAT[selected_worker_id][task_id] = cur_task_finish_time allocated_tasks_info[task_id] = (selected_worker_id, cur_task_finish_time) if workers_available_memory[selected_worker_id] >= fetching_model_size: workers_available_memory[selected_worker_id] -= fetching_model_size @@ -155,6 +158,7 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co def nav_heft_task_adjustment(job, task_id, workers, current_time, local_worker_id, allocated_worker_id) -> int: # 1. check assigned worker wait_time to decide if need to adjust assigned worker cur_wait_time = workers[allocated_worker_id].get_task_queue_waittime(current_time, \ + task_id, \ info_staleness=LOAD_INFORMATION_STALENESS, \ requiring_worker_id=local_worker_id) cur_task = job.tasks[task_id] @@ -167,6 +171,7 @@ def nav_heft_task_adjustment(job, task_id, workers, current_time, local_worker_i earliest_start_time = float('inf') for cur_worker in workers: wait_time = cur_worker.get_task_queue_waittime(current_time, \ + task_id, \ info_staleness=LOAD_INFORMATION_STALENESS, \ requiring_worker_id=local_worker_id) cur_earliest_start_time = current_time + wait_time From c3a8e436f40f8b0fe05ad3cd628a661673cb8397 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Fri, 6 Jun 2025 13:59:17 -0400 Subject: [PATCH 13/41] task type bug fix --- core/events.py | 6 +-- core/job.py | 1 + core/task.py | 5 ++- workers/taskworker.py | 99 ++++++++++++++++++++++--------------------- 4 files changed, 58 insertions(+), 53 deletions(-) diff --git a/core/events.py b/core/events.py index 269b9e5..f3480d6 100644 --- a/core/events.py +++ b/core/events.py @@ -169,17 +169,17 @@ def to_string(self): class BatchEndEvent(Event): """ Event to signify that a BATCH has been performed by the WORKER. """ - def __init__(self, worker, job_ids=[], task_id=-1): + def __init__(self, worker, job_ids=[], task_type=(-1, -1)): self.worker = worker self.job_ids = job_ids # integers representing the job_ids - self.task_id = task_id # integer representing the task_id + self.task_type = task_type # (workflow_id, task_id) def run(self, current_time): return self.worker.free_slot(current_time) def to_string(self): jobs = ",".join([str(id) for id in self.job_ids]) - return f"[Batch End (Task {self.task_id}, Jobs {jobs}) at Worker {self.worker.worker_id}]" + return f"[Batch End (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]" # for PER_JOB scheduler diff --git a/core/job.py b/core/job.py index 6d9822f..434c397 100644 --- a/core/job.py +++ b/core/job.py @@ -81,6 +81,7 @@ def job_generate_from_workflow(self): current_task = Task(self.id, # ID of the associated unique Job task_cfg["TASK_INDEX"], # taskID + (self.job_type_id, task_cfg["TASK_INDEX"]), # task type task_cfg["EXECUTION_TIME"], required_model_for_task, task_cfg["INPUT_SIZE"], diff --git a/core/task.py b/core/task.py index b490e52..61c478a 100644 --- a/core/task.py +++ b/core/task.py @@ -2,9 +2,12 @@ class Task(object): - def __init__(self, job_id, task_id, task_exec_duration, required_model, input_size, result_size, max_batch_size, max_wait_time, batch_sizes, batch_exec_time): + def __init__(self, job_id, task_id, task_type, task_exec_duration, + required_model, input_size, result_size, max_batch_size, + max_wait_time, batch_sizes, batch_exec_time): self.job_id = job_id # id of the job the task belongs to self.task_id = task_id # id of the task itself + self.task_type = task_type # (workflow_id, task_id) # the time it takes to execute the task self.task_exec_duration = task_exec_duration # required model_id to execute the task. None if it is a computation task that doesn't involve ML model diff --git a/workers/taskworker.py b/workers/taskworker.py index 296bcb6..b9211d2 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -4,7 +4,6 @@ from core.network import * from core.events import * from schedulers.algo.nav_heft_algo import * -import itertools class TaskWorker(Worker): @@ -22,12 +21,12 @@ def add_task(self, current_time, task): Add task into the local task queue """ - # print(f"[{current_time}] W{self.worker_id}: T{task.task_id} arrived") + # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived") # Update when the task is sent to the worker assert (task.log.task_placed_on_worker_queue_timestamp <= current_time) self.add_task_to_queue_history(task, current_time) - _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_id, task.max_wait_time) + _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_type, task.max_wait_time) return task_end_events def free_slot(self, current_time): @@ -69,34 +68,34 @@ def schedule_job_heft(self, current_time, job): # --------------------------- TASK EXECUTION ---------------------- - def get_sorted_task_ids(self, current_time) -> tuple[list[int], dict[int, list[Task]]]: + def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[(int, int)], dict[(int, int), list[Task]]]: """ - Returns a list of all task_ids with at least 1 task queued on this + Returns a list of all task_types with at least 1 task queued on this worker in order of when they are scheduled to execute (e.g. task queue at index 0 is the next to be executed when a slot opens up on the worker) - in addition to a map of all task_ids to their task queues. + in addition to a map of all task_types to their task queues. """ - task_ids = self.queue_history.keys() - task_queues = { task_id: self.get_queue_history(current_time, task_id) for task_id in task_ids } + task_types = self.queue_history.keys() + task_queues = { task_type: self.get_queue_history(current_time, task_type, info_staleness) for task_type in task_types } - task_ids_by_arrival = sorted( - filter(lambda task_id: len(task_queues[task_id]) > 0, task_ids), - key=lambda task_id: task_queues[task_id][0].log.task_placed_on_worker_queue_timestamp, + task_types_by_arrival = sorted( + filter(lambda task_type: len(task_queues[task_type]) > 0, task_types), + key=lambda task_type: task_queues[task_type][0].log.task_placed_on_worker_queue_timestamp, ) - return task_ids_by_arrival, task_queues + return task_types_by_arrival, task_queues def maybe_start_task_any(self, current_time): - task_ids, task_queues = self.get_sorted_task_ids(current_time) + task_types, task_queues = self.get_sorted_task_types(current_time) if self.num_free_slots > 0: - for task_id in task_ids: - first_queued_task = task_queues[task_id][0] + for task_type in task_types: + first_queued_task = task_queues[task_type][0] if (current_time >= first_queued_task.log.task_placed_on_worker_queue_timestamp): did_exec_batch, task_end_events = self.maybe_start_task_for_type( - current_time, task_id, first_queued_task.max_wait_time + current_time, task_type, first_queued_task.max_wait_time ) if did_exec_batch: return task_end_events @@ -131,6 +130,8 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> # full batch or max wait time has passed if len(task_list) > 0 and self.num_free_slots > 0: + # print(f"[{current_time}] W{self.worker_id}: Batch of {task_list} executing") + batch_end_events, task_end_time = self.batch_execute( batch, current_time) @@ -194,7 +195,7 @@ def batch_execute(self, tasks, current_time): task.log.task_execution_end_timestamp = task_end_time task_end_events.append(EventOrders(task_end_time, BatchEndEvent( - self, job_ids=job_ids, task_id=tasks[0].task_id + self, job_ids=job_ids, task_type=tasks[0].task_type ))) return task_end_events, task_end_time @@ -254,24 +255,24 @@ def receive_intermediate_result(self, current_time, prev_task, cur_task) -> list def add_task_to_queue_history(self, task, current_time): # 0. Base case (first entry) - if task.task_id not in self.queue_history: - self.queue_history[task.task_id] = [(current_time, [task])] + if task.task_type not in self.queue_history: + self.queue_history[task.task_type] = [(current_time, [task])] return # 1. Find the time_stamp place to add this queue information - last_index = len(self.queue_history[task.task_id]) - 1 + last_index = len(self.queue_history[task.task_type]) - 1 while last_index >= 0: - if self.queue_history[task.task_id][last_index][0] == current_time: - if task not in self.queue_history[task.task_id][last_index][1]: - self.queue_history[task.task_id][last_index][1].append(task) + if self.queue_history[task.task_type][last_index][0] == current_time: + if task not in self.queue_history[task.task_type][last_index][1]: + self.queue_history[task.task_type][last_index][1].append(task) break - if self.queue_history[task.task_id][last_index][0] < current_time: + if self.queue_history[task.task_type][last_index][0] < current_time: # print("2") - if task not in self.queue_history[task.task_id][last_index][1]: - next_queue = self.queue_history[task.task_id][last_index][1].copy() + if task not in self.queue_history[task.task_type][last_index][1]: + next_queue = self.queue_history[task.task_type][last_index][1].copy() next_queue.append(task) last_index += 1 - self.queue_history[task.task_id].insert( + self.queue_history[task.task_type].insert( last_index, (current_time, next_queue) ) break @@ -279,54 +280,54 @@ def add_task_to_queue_history(self, task, current_time): last_index -= 1 # 2. added the task to all the subsequent timestamp tuples - while last_index < len(self.queue_history[task.task_id]): - if task not in self.queue_history[task.task_id][last_index][1]: - self.queue_history[task.task_id][last_index][1].append(task) + while last_index < len(self.queue_history[task.task_type]): + if task not in self.queue_history[task.task_type][last_index][1]: + self.queue_history[task.task_type][last_index][1].append(task) last_index += 1 def rm_task_in_queue_history(self, task, current_time): # 0. base case: shouldn't happen - if task.task_id not in self.queue_history: + if task.task_type not in self.queue_history: AssertionError("rm model cached location to an empty list") return - last_index = len(self.queue_history[task.task_id]) - 1 + last_index = len(self.queue_history[task.task_type]) - 1 # 1. find the place to add this remove_event to the tuple list while last_index >= 0: - if self.queue_history[task.task_id][last_index][0] == current_time: - if task in self.queue_history[task.task_id][last_index][1]: - self.queue_history[task.task_id][last_index][1].remove(task) + if self.queue_history[task.task_type][last_index][0] == current_time: + if task in self.queue_history[task.task_type][last_index][1]: + self.queue_history[task.task_type][last_index][1].remove(task) break - if self.queue_history[task.task_id][last_index][0] < current_time: - if task in self.queue_history[task.task_id][last_index][1]: - next_tasks_in_queue = self.queue_history[task.task_id][last_index][1].copy() + if self.queue_history[task.task_type][last_index][0] < current_time: + if task in self.queue_history[task.task_type][last_index][1]: + next_tasks_in_queue = self.queue_history[task.task_type][last_index][1].copy() next_tasks_in_queue.remove(task) last_index = last_index + 1 - self.queue_history[task.task_id].insert( + self.queue_history[task.task_type].insert( last_index, (current_time, next_tasks_in_queue) ) break last_index -= 1 # go to prev time # 2. remove the task from all the subsequent tuple - while last_index < len(self.queue_history[task.task_id]): - if task in self.queue_history[task.task_id][last_index]: - self.queue_history[task.task_id][last_index][1].remove(task) + while last_index < len(self.queue_history[task.task_type]): + if task in self.queue_history[task.task_type][last_index]: + self.queue_history[task.task_type][last_index][1].remove(task) last_index += 1 # do this for the remaining element after - def get_queue_history(self, current_time, task_id, info_staleness=0) -> list: - return self.get_history(self.queue_history[task_id], current_time, info_staleness) + def get_queue_history(self, current_time, task_type, info_staleness=0) -> list: + return self.get_history(self.queue_history[task_type], current_time, info_staleness) - def get_task_queue_waittime(self, current_time, task_id, info_staleness=0, requiring_worker_id=None): + def get_task_queue_waittime(self, current_time, task_type, info_staleness=0, requiring_worker_id=None): if requiring_worker_id != None and requiring_worker_id != self.worker_id: info_staleness = 0 - task_ids, task_queues = self.get_sorted_task_ids(current_time) + task_types, task_queues = self.get_sorted_task_types(current_time, info_staleness=info_staleness) wait_time = 0 - for queued_task_id in task_ids: - for task in task_queues[queued_task_id]: + for queued_task_type in task_types: + for task in task_queues[queued_task_type]: wait_time += task.task_exec_duration - if queued_task_id == task_id: + if queued_task_type == task_type: return wait_time return wait_time From a1449ec495fbc079bf0cf17d560e55b4ffe80db0 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Fri, 6 Jun 2025 14:59:24 -0400 Subject: [PATCH 14/41] task type fix --- schedulers/algo/nav_heft_algo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/schedulers/algo/nav_heft_algo.py b/schedulers/algo/nav_heft_algo.py index e87656e..1f2e2e3 100644 --- a/schedulers/algo/nav_heft_algo.py +++ b/schedulers/algo/nav_heft_algo.py @@ -83,14 +83,14 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co workers[worker.worker_id] = worker sorted_tasks = ranking_tasks(job) workers_to_select = [w.worker_id for w in worker_list] - workers_EAT = {} # worker_id -> (task_id -> earliest_available_time) + workers_EAT = {} # worker_id -> (task_type -> earliest_available_time) workers_available_memory = {} # worker_id -> available_memory # 1. initialize the earliest available time and memory for each worker for worker_id in workers_to_select: workers_EAT[worker_id] = { task_id: current_time + (workers[worker_id].get_task_queue_waittime( current_time, - task_id, + (job.job_type_id, task_id), info_staleness=LOAD_INFORMATION_STALENESS, requiring_worker_id=initial_worker_id) if consider_load else 0) for task_id in sorted_tasks @@ -158,7 +158,7 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co def nav_heft_task_adjustment(job, task_id, workers, current_time, local_worker_id, allocated_worker_id) -> int: # 1. check assigned worker wait_time to decide if need to adjust assigned worker cur_wait_time = workers[allocated_worker_id].get_task_queue_waittime(current_time, \ - task_id, \ + (job.job_type_id, task_id), \ info_staleness=LOAD_INFORMATION_STALENESS, \ requiring_worker_id=local_worker_id) cur_task = job.tasks[task_id] @@ -171,7 +171,7 @@ def nav_heft_task_adjustment(job, task_id, workers, current_time, local_worker_i earliest_start_time = float('inf') for cur_worker in workers: wait_time = cur_worker.get_task_queue_waittime(current_time, \ - task_id, \ + (job.job_type_id, task_id), \ info_staleness=LOAD_INFORMATION_STALENESS, \ requiring_worker_id=local_worker_id) cur_earliest_start_time = current_time + wait_time From 14b857d0568e7abf84bdc46587d038892416ac42 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Fri, 6 Jun 2025 15:00:44 -0400 Subject: [PATCH 15/41] event logging fix --- schedulers/centralized/simulation_central.py | 3 ++- schedulers/decentralized/simulation_decentral.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/schedulers/centralized/simulation_central.py b/schedulers/centralized/simulation_central.py index 3754077..5f75adc 100644 --- a/schedulers/centralized/simulation_central.py +++ b/schedulers/centralized/simulation_central.py @@ -49,11 +49,12 @@ def run(self): self.event_queue.put(EventOrders( external_client_id * job_create_interval, \ JobCreationAtExternalClient(self, external_client_id))) + last_time = 0 while self.remaining_jobs > 0: cur_event = self.event_queue.get() - if type(cur_event) != WorkerWakeUpEvent or cur_event.will_run(cur_event.current_time): + if type(cur_event.event) != WorkerWakeUpEvent or cur_event.event.will_run(cur_event.current_time): self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()] assert cur_event.current_time >= last_time diff --git a/schedulers/decentralized/simulation_decentral.py b/schedulers/decentralized/simulation_decentral.py index 0ba0f22..cc80ac5 100644 --- a/schedulers/decentralized/simulation_decentral.py +++ b/schedulers/decentralized/simulation_decentral.py @@ -51,7 +51,7 @@ def run(self): while self.remaining_jobs > 0: cur_event = self.event_queue.get() - if type(cur_event) != WorkerWakeUpEvent or cur_event.will_run(cur_event.current_time): + if type(cur_event.event) != WorkerWakeUpEvent or cur_event.event.will_run(cur_event.current_time): self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()] assert cur_event.current_time >= last_time From f8e40de526d7edca456bf308bebc689b6ae79067 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 20 May 2025 10:49:32 -0400 Subject: [PATCH 16/41] env vars setup script --- set_env.sh | 2 ++ 1 file changed, 2 insertions(+) create mode 100755 set_env.sh diff --git a/set_env.sh b/set_env.sh new file mode 100755 index 0000000..2437e4e --- /dev/null +++ b/set_env.sh @@ -0,0 +1,2 @@ +export SIMULATION_DIR=$(pwd) +export PYTHONPATH="${PYTHONPATH}:${SIMULATION_DIR}" From e497a17e35f1af54ee9846da67a011b39a75367f Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Fri, 6 Jun 2025 18:51:36 -0400 Subject: [PATCH 17/41] dummy wf --- core/workflow.py | 105 +++++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 53 deletions(-) diff --git a/core/workflow.py b/core/workflow.py index fc8f7a4..0a1bb50 100644 --- a/core/workflow.py +++ b/core/workflow.py @@ -16,9 +16,9 @@ "OUTPUT_SIZE": 2, # in kB "EXECUTION_TIME": 561, # avg time, in ms "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms + "MAX_WAIT_TIME": 1000, # ms "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [561, 561, 561, 561, 561] + "BATCH_EXEC_TIME": [561, 673, 808, 969, 1346] }, {"MODEL_NAME": "marian", "MODEL_ID": 1, @@ -30,9 +30,9 @@ "OUTPUT_SIZE": 2, "EXECUTION_TIME": 441, # in ms "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms + "MAX_WAIT_TIME": 1000, # ms "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [441, 441, 441, 441, 441] + "BATCH_EXEC_TIME": [441, 529, 687, 963, 1374] }, {"MODEL_NAME": "mt5", "MODEL_ID": 2, @@ -44,11 +44,11 @@ "OUTPUT_SIZE": 2, "EXECUTION_TIME": 778, # in ms "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms + "MAX_WAIT_TIME": 1000, # ms "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [778, 778, 778, 778, 778] + "BATCH_EXEC_TIME": [778, 855, 941, 1035, 1139] }, - {"MODEL_NAME": "mt5", + {"MODEL_NAME": "mt5", "MODEL_ID": 2, "TASK_INDEX": 3, "PREV_TASK_INDEX": [0], @@ -58,11 +58,11 @@ "OUTPUT_SIZE": 2, "EXECUTION_TIME": 803, # in ms "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms + "MAX_WAIT_TIME": 1000, # ms "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [803, 803, 803, 803, 803] + "BATCH_EXEC_TIME": [803, 833, 871, 939, 990] }, - {"MODEL_NAME": "", + {"MODEL_NAME": "", "MODEL_ID": -1, "TASK_INDEX": 4, "PREV_TASK_INDEX": [1,2,3], @@ -71,10 +71,10 @@ "INPUT_SIZE": 2, "OUTPUT_SIZE": 2, "EXECUTION_TIME": 1, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [1, 1, 1, 1, 1] + "MAX_BATCH_SIZE": 64, + "MAX_WAIT_TIME": 500, # ms + "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64], + "BATCH_EXEC_TIME": [1, 1, 1, 1, 1, 1, 1] }, ] }, @@ -93,9 +93,9 @@ "OUTPUT_SIZE": 2, # in kB "EXECUTION_TIME": 560, # avg time, in ms "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms + "MAX_WAIT_TIME": 1000, # ms "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [560, 560, 560, 560, 560] + "BATCH_EXEC_TIME": [560, 616, 677, 745, 820] }, {"MODEL_NAME": "NLI", "MODEL_ID": 3, @@ -106,10 +106,10 @@ "INPUT_SIZE": 1, "OUTPUT_SIZE": 1, "EXECUTION_TIME": 27, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [27, 27, 27, 27, 27] + "MAX_BATCH_SIZE": 8, + "MAX_WAIT_TIME": 500, # ms + "BATCH_SIZES": [1, 2, 4, 8], + "BATCH_EXEC_TIME": [27, 48, 89, 170] } ] }, @@ -127,9 +127,9 @@ "OUTPUT_SIZE": 20, "EXECUTION_TIME": 283, # avg time, in ms "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms + "MAX_WAIT_TIME": 500, # ms "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [283, 283, 283, 283, 283] + "BATCH_EXEC_TIME": [283, 339, 407, 489, 590] }, {"MODEL_NAME": "NLI", "MODEL_ID": 3, @@ -140,10 +140,10 @@ "INPUT_SIZE": 20, # 299×299, assuming 64 bits representation "OUTPUT_SIZE": 10, "EXECUTION_TIME": 26, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [26, 26, 26, 26, 26] + "MAX_BATCH_SIZE": 2, + "MAX_WAIT_TIME": 100, # ms + "BATCH_SIZES": [1, 2], + "BATCH_EXEC_TIME": [26, 48] }, {"MODEL_NAME": "txt2speech", "MODEL_ID": 5, @@ -154,12 +154,12 @@ "INPUT_SIZE": 20, "OUTPUT_SIZE": 3000, "EXECUTION_TIME": 76, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [76, 76, 76, 76, 76] + "MAX_BATCH_SIZE": 32, + "MAX_WAIT_TIME": 100, # ms + "BATCH_SIZES": [1, 2, 4, 8, 16, 32], + "BATCH_EXEC_TIME": [76, 77, 82, 91, 106, 135] }, - {"MODEL_NAME": "aggregate", + {"MODEL_NAME": "aggregate", "MODEL_ID": -1, "TASK_INDEX": 3, "PREV_TASK_INDEX": [1,2], @@ -168,10 +168,10 @@ "INPUT_SIZE": 3000, "OUTPUT_SIZE": 3000, "EXECUTION_TIME": 0.2, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [0.2, 0.2, 0.2, 0.2, 0.2] + "MAX_BATCH_SIZE": 64, + "MAX_WAIT_TIME": 100, # ms + "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64], + "BATCH_EXEC_TIME": [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.3] } ] }, @@ -188,10 +188,10 @@ "INPUT_SIZE": 3000, "OUTPUT_SIZE": 3000, "EXECUTION_TIME": 0.6, # avg time, in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [0.6, 0.6, 0.6, 0.6, 0.6] + "MAX_BATCH_SIZE": 32, + "MAX_WAIT_TIME": 100, # ms + "BATCH_SIZES": [1, 2, 4, 8, 16, 32], + "BATCH_EXEC_TIME": [0.6, 0.6, 0.6, 0.6, 0.6, 0.6] }, {"MODEL_NAME": "DETR", "MODEL_ID": 8, @@ -202,10 +202,10 @@ "INPUT_SIZE": 3000, # 299×299, assuming 64 bits representation "OUTPUT_SIZE": 3000, "EXECUTION_TIME": 178, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [178, 178, 178, 178, 178] + "MAX_BATCH_SIZE": 4, + "MAX_WAIT_TIME": 500, # ms + "BATCH_SIZES": [1, 2, 4], + "BATCH_EXEC_TIME": [178, 267, 400] }, {"MODEL_NAME": "Depth", "MODEL_ID": 9, @@ -217,11 +217,11 @@ "OUTPUT_SIZE": 3000, "EXECUTION_TIME": 147, # in ms "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms + "MAX_WAIT_TIME": 500, # ms "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [147, 147, 147, 147, 147] + "BATCH_EXEC_TIME": [147, 150, 155, 162, 172] }, - {"MODEL_NAME": "Aggregate", + {"MODEL_NAME": "Aggregate", "MODEL_ID": -1, "TASK_INDEX": 3, "PREV_TASK_INDEX": [1,2], @@ -230,12 +230,11 @@ "INPUT_SIZE": 3000, "OUTPUT_SIZE": 3000, "EXECUTION_TIME": 104, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 5000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [104, 104, 104, 104, 104] + "MAX_BATCH_SIZE": 8, + "MAX_WAIT_TIME": 500, # ms + "BATCH_SIZES": [1, 2, 4, 8], + "BATCH_EXEC_TIME": [104, 130, 165, 213] } ] - }, - -] + } +] \ No newline at end of file From 22dbc005d8a328f71c063ed664db37bb9af2c33c Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 10 Jun 2025 10:35:19 -0400 Subject: [PATCH 18/41] merge --- experiments/parse_results.py | 158 +++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 experiments/parse_results.py diff --git a/experiments/parse_results.py b/experiments/parse_results.py new file mode 100644 index 0000000..ee2ecfe --- /dev/null +++ b/experiments/parse_results.py @@ -0,0 +1,158 @@ +import sys +import os +import pandas as pd +import matplotlib.pyplot as plt + +from core.workflow import * +from functools import reduce + + +# TODO: verify units +def plot_response_time_vs_arrival_time(job_df, out_path): + plt.figure(figsize=(10, 6)) + + job_types = set(job_df["workflow_type"]) + job_names = { job_type: list(filter(lambda job: job["JOB_TYPE"]==job_type, WORKFLOW_LIST))[0]["JOB_NAME"] + for job_type in job_types } + + fst_job_create_time = job_df["job_create_time"][0] + for jt in job_types: + job_create_times = job_df[job_df["workflow_type"] == jt]["job_create_time"] - fst_job_create_time + job_response_times = job_df[job_df["workflow_type"] == jt]["response_time"] + + plt.scatter( + job_create_times, + job_response_times, + label=f"Workflow {jt}: {job_names[jt]}", + s=4 + ) + + plt.xlabel("Job arrival time (ms since start)") + plt.ylabel("Response time (ms)") + plt.title("Response Time vs. Arrival Time by Job Type") + + plt.legend() + plt.savefig(os.path.join(out_path, "response_vs_arrival.png")) + + +def plot_batch_size_vs_batch_start(event_df, out_path): + batch_start_events = event_df[event_df["event"].str.contains("Batch Start")] + + task_types = set(batch_start_events["event"].str.extract(r"Task \(([0-9]+, [0-9]+)\)")[0]) + model_names = { + task_type: list(filter( + lambda task: task["TASK_INDEX"]==int(task_type.split(", ")[1]), + list(filter(lambda job: job["JOB_TYPE"]==int(task_type.split(",")[0]), WORKFLOW_LIST))[0]["TASKS"] + ))[0]["MODEL_NAME"] for task_type in task_types } + + for task_type in task_types: + type_details = [int(item) for item in task_type.split(", ")] # [workflow_id, task_id] + + fig = plt.figure(figsize=(10, 6)) + + batch_start_events_for_type = batch_start_events[batch_start_events["event"].str.contains(f"Task \({task_type}\)")] + batch_sizes = batch_start_events_for_type["event"].str.extract(r"Jobs ([0-9|,]+)")[0].str.count(f'[0-9]+') + + plt.scatter( + batch_start_events_for_type["time"], + batch_sizes, + label=f"Workflow {type_details[0]}, Task ID {type_details[1]}: Model {model_names[task_type]}", + s=4 + ) + + plt.xlabel("Batch exec start time (ms since start)") + plt.ylabel("Batch size") + plt.title("Batch Size vs. Time by Model") + + plt.legend() + plt.savefig(os.path.join(out_path, f"wf_{type_details[0]}_task_{type_details[1]}_batch_size_vs_time.png")) + + +def plot_batch_size_bar_chart(event_df, out_path): + batch_start_events = event_df[event_df["event"].str.contains("Batch Start")] + + task_types = set(batch_start_events["event"].str.extract(r"Task \(([0-9]+, [0-9]+)\)")[0]) + task_details = { + task_type: list(filter( + lambda task: task["TASK_INDEX"]==int(task_type.split(", ")[1]), + list(filter(lambda job: job["JOB_TYPE"]==int(task_type.split(",")[0]), WORKFLOW_LIST))[0]["TASKS"] + ))[0] for task_type in task_types } + + for task_type in task_types: + type_details = [int(item) for item in task_type.split(", ")] # [workflow_id, task_id] + + fig = plt.figure(figsize=(8, 6)) + + batch_start_events_for_type = batch_start_events[batch_start_events["event"].str.contains(f"Task \({task_type}\)")] + batch_size_events = batch_start_events_for_type["event"].str.extract(r"Jobs ([0-9|,]+)")[0].str.count(f'[0-9]+') + batch_size_counts = list(map(lambda size: (batch_size_events == size).sum(), + task_details[task_type]["BATCH_SIZES"])) + + plt.bar( + range(len(task_details[task_type]["BATCH_SIZES"])), + batch_size_counts + ) + + plt.xticks(range(len(task_details[task_type]["BATCH_SIZES"])), task_details[task_type]["BATCH_SIZES"]) + plt.xlabel("Batch sizes") + plt.ylabel("Number of batches") + plt.title(f"Batch size distribution for {task_details[task_type]["MODEL_NAME"]} Model") + + plt.savefig(os.path.join(out_path, f"wf_{type_details[0]}_task_{type_details[1]}_batch_size_dist.png")) + + +def gen_per_task_stats(task_df, out_path): + job_types = set(task_df["workflow_type"]) + task_types_per_job = list(map( + lambda jt: set(task_df[task_df["workflow_type"] == jt]["task_id"]), + job_types + )) + + task_stat_types = ["arrival_at_worker_to_exec_start_time", "arrival_at_worker_to_enqueue_time", + "enqueue_to_exec_start_time", "model_fetching_time"] + task_stats = reduce( + lambda acc, t: acc + [f"mean_{t}", f"median_{t}", f"p99_{t}"], + task_stat_types, + [] + ) + task_stat_df = pd.DataFrame(columns=["job_type", "task_type"] + task_stats) + + for i, jt in enumerate(job_types): + for task_type in task_types_per_job[i]: + task_df_row_i = len(task_stat_df) + task_stat_df.loc[task_df_row_i] = {"job_type": jt, "task_type": task_type} + + task_set = task_df[(task_df["workflow_type"] == jt) + & (task_df["task_id"] == task_type)] + + task_stat_data = { + "arrival_at_worker_to_exec_start_time": task_set["task_start_exec_time"] - task_set["task_arrival_time"], + "arrival_at_worker_to_enqueue_time": task_set["dependency_wait_time"], + "enqueue_to_exec_start_time": task_set["time_spent_in_queue"], + "model_fetching_time": task_set["model_fetching_time"] + } + for stat in task_stat_types: + task_stat_df.loc[task_df_row_i, f"mean_{stat}"] = task_stat_data[stat].mean() + task_stat_df.loc[task_df_row_i, f"median_{stat}"] = task_stat_data[stat].median() + task_stat_df.loc[task_df_row_i, f"p99_{stat}"] = task_stat_data[stat].quantile(0.99) + + task_stat_df.to_csv(os.path.join(out_path, "per_task_avgs.csv")) + + +def gen_stats(job_df, event_df): + print(f"Mean response time: {job_df["response_time"].mean()}, Max: {job_df["response_time"].max()}") + # print(f"TPUT: {len(job_df) / event_df.loc[len(events_df)-1]["time"]}") + + +results_dir_path = sys.argv[1] # results/ +out_path = sys.argv[2] if len(sys.argv) > 2 else "parsed_results" + +os.makedirs(out_path, exist_ok=True) + +job_df = pd.read_csv(os.path.join(results_dir_path, "job_breakdown.csv")) +# task_df = pd.read_csv(os.path.join(results_dir_path, "loadDelay_1_placementDelay_1.csv")) +events_df = pd.read_csv(os.path.join(results_dir_path, 'events_by_time.csv')) + +plot_batch_size_bar_chart(events_df, out_path) +plot_batch_size_vs_batch_start(events_df, out_path) +plot_response_time_vs_arrival_time(job_df, out_path) \ No newline at end of file From 6243924eceaa9b5dd456d664cec83f0dc006d1ae Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 10 Jun 2025 10:55:56 -0400 Subject: [PATCH 19/41] real workflow --- core/config.py | 10 +- core/workflow.py | 302 +++++++++++++++++------------------------------ 2 files changed, 115 insertions(+), 197 deletions(-) diff --git a/core/config.py b/core/config.py index d641630..c088dd1 100644 --- a/core/config.py +++ b/core/config.py @@ -1,14 +1,14 @@ """ -------- Worker Machines Parameters -------- """ -GPU_MEMORY_SIZE = 14000000 # in KB, 15BG for Tesla T4 +GPU_MEMORY_SIZE = 24000000 # in KB, 24GB for NVIDIA A30 -TOTAL_NUM_OF_WORKERS = 140 +TOTAL_NUM_OF_WORKERS = 50 """ -------- Workload Parameters -------- """ -TOTAL_NUM_OF_JOBS = 1000 +TOTAL_NUM_OF_JOBS = 10000 # The interval between two consecutive job creation events at each external client -DEFAULT_CREATION_INTERVAL_PERCLIENT = 100 # ms. +DEFAULT_CREATION_INTERVAL_PERCLIENT = 0.2 # ms. WORKLOAD_DISTRIBUTION = "POISON" # UNIFORM | POISON | GAMMA @@ -20,4 +20,4 @@ PLACEMENT_INFORMATION_STALENESS = 1 # in ms -RESCHEDULE_THREASHOLD = 1.5 \ No newline at end of file +RESCHEDULE_THREASHOLD = 1.5 diff --git a/core/workflow.py b/core/workflow.py index 0a1bb50..2f470a0 100644 --- a/core/workflow.py +++ b/core/workflow.py @@ -3,238 +3,156 @@ WORKFLOW_LIST = [ {"JOB_TYPE": 0, # ID of the type of workflow (dependency graph) - "JOB_NAME": "translation", + "JOB_NAME": "textvision", # the minimum amount of time necessary to execute the whole job - "BEST_EXEC_TIME": 1365, - "TASKS": [{"MODEL_NAME": "OPT", - "MODEL_ID": 0, + "BEST_EXEC_TIME": 51.7, + "TASKS": [{"MODEL_NAME": "", + "MODEL_ID": -1, "TASK_INDEX": 0, "PREV_TASK_INDEX": [], - "NEXT_TASK_INDEX": [1,2,3], - "MODEL_SIZE": 5720000, # in kB + "NEXT_TASK_INDEX": [1, 2], + "MODEL_SIZE": 0, # in KB "INPUT_SIZE": 1, - "OUTPUT_SIZE": 2, # in kB - "EXECUTION_TIME": 561, # avg time, in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 1000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [561, 673, 808, 969, 1346] + "OUTPUT_SIZE": 1, + "EXECUTION_TIME": 1, # in ms + "MAX_BATCH_SIZE": 128, + "MAX_WAIT_TIME": 1, # ms + "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64, 128], + "BATCH_EXEC_TIME": [1, 1, 1, 1, 1, 1, 1, 1] }, - {"MODEL_NAME": "marian", - "MODEL_ID": 1, + {"MODEL_NAME": "text_encoder", + "MODEL_ID": 0, "TASK_INDEX": 1, "PREV_TASK_INDEX": [0], - "NEXT_TASK_INDEX": [4], - "MODEL_SIZE": 800000, # in kB - "INPUT_SIZE": 2, - "OUTPUT_SIZE": 2, - "EXECUTION_TIME": 441, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 1000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [441, 529, 687, 963, 1374] + "NEXT_TASK_INDEX": [3], + "MODEL_SIZE": 5677000, # in kB + "INPUT_SIZE": 1, + "OUTPUT_SIZE": 2, # in kB + "EXECUTION_TIME": 10, # avg time, in ms + "MAX_BATCH_SIZE": 128, + "MAX_WAIT_TIME": 1, # ms + "BATCH_SIZES": [1, 4, 8, 16, 32, 64, 128], + "BATCH_EXEC_TIME": [10, 10, 11, 12, 15, 20, 31] }, - {"MODEL_NAME": "mt5", - "MODEL_ID": 2, + {"MODEL_NAME": "vision_encoder", + "MODEL_ID": 1, "TASK_INDEX": 2, "PREV_TASK_INDEX": [0], - "NEXT_TASK_INDEX": [4], - "MODEL_SIZE": 2000000, # in KB - "INPUT_SIZE": 2, - "OUTPUT_SIZE": 2, - "EXECUTION_TIME": 778, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 1000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [778, 855, 941, 1035, 1139] + "NEXT_TASK_INDEX": [3], + "MODEL_SIZE": 11655000, # in kB + "INPUT_SIZE": 10000, + "OUTPUT_SIZE": 100, + "EXECUTION_TIME": 31, # in ms + "MAX_BATCH_SIZE": 8, + "MAX_WAIT_TIME": 1, # ms + "BATCH_SIZES": [1, 4, 8], + "BATCH_EXEC_TIME": [31, 98, 183] }, - {"MODEL_NAME": "mt5", + {"MODEL_NAME": "flmr", "MODEL_ID": 2, "TASK_INDEX": 3, - "PREV_TASK_INDEX": [0], + "PREV_TASK_INDEX": [1,2], "NEXT_TASK_INDEX": [4], - "MODEL_SIZE": 2000000, # in KB - "INPUT_SIZE": 2, - "OUTPUT_SIZE": 2, - "EXECUTION_TIME": 803, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 1000, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [803, 833, 871, 939, 990] + "MODEL_SIZE": 854000, # in KB + "INPUT_SIZE": 102, + "OUTPUT_SIZE": 5, + "EXECUTION_TIME": 1.7, # in ms + "MAX_BATCH_SIZE": 32, + "MAX_WAIT_TIME": 1, # ms + "BATCH_SIZES": [1, 2, 4, 8, 16, 32], + "BATCH_EXEC_TIME": [1.7, 1.9, 1.9, 2, 2.6, 3.1] }, - {"MODEL_NAME": "", - "MODEL_ID": -1, + {"MODEL_NAME": "search", + "MODEL_ID": 3, "TASK_INDEX": 4, - "PREV_TASK_INDEX": [1,2,3], + "PREV_TASK_INDEX": [3], "NEXT_TASK_INDEX": [], - "MODEL_SIZE": 0, # in KB - "INPUT_SIZE": 2, + "MODEL_SIZE": 777000, # in KB + "INPUT_SIZE": 5, "OUTPUT_SIZE": 2, - "EXECUTION_TIME": 1, # in ms - "MAX_BATCH_SIZE": 64, - "MAX_WAIT_TIME": 500, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64], - "BATCH_EXEC_TIME": [1, 1, 1, 1, 1, 1, 1] - }, + "EXECUTION_TIME": 18, # in ms + "MAX_BATCH_SIZE": 16, + "MAX_WAIT_TIME": 1, # ms + "BATCH_SIZES": [1, 4, 8, 16], + "BATCH_EXEC_TIME": [18, 64, 114, 209] + } ] }, {"JOB_TYPE": 1, - "JOB_NAME": "question_answer", + "JOB_NAME": "tts", # the minimum amount of time necessary to execute the whole job - "BEST_EXEC_TIME": 587, - "TASKS": [{"MODEL_NAME": "OPT", - "MODEL_ID": 0, + "BEST_EXEC_TIME": 308.4, + "TASKS": [{"MODEL_NAME": "audio_det", + "MODEL_ID": 4, "TASK_INDEX": 0, "PREV_TASK_INDEX": [], "NEXT_TASK_INDEX": [1], - "MODEL_SIZE": 5720000, # in kB - "INPUT_SIZE": 1, + "MODEL_SIZE": 10525000, # in kB + "INPUT_SIZE": 10000, "OUTPUT_SIZE": 2, # in kB - "EXECUTION_TIME": 560, # avg time, in ms + "EXECUTION_TIME": 66, # avg time, in ms "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 1000, # ms + "MAX_WAIT_TIME": 1, # ms "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [560, 616, 677, 745, 820] + "BATCH_EXEC_TIME": [66, 68, 70, 76, 127] }, - {"MODEL_NAME": "NLI", - "MODEL_ID": 3, - "TASK_INDEX": 1, - "PREV_TASK_INDEX": [0], - "NEXT_TASK_INDEX": [], - "MODEL_SIZE": 2140000, # in kB - "INPUT_SIZE": 1, - "OUTPUT_SIZE": 1, - "EXECUTION_TIME": 27, # in ms - "MAX_BATCH_SIZE": 8, - "MAX_WAIT_TIME": 500, # ms - "BATCH_SIZES": [1, 2, 4, 8], - "BATCH_EXEC_TIME": [27, 48, 89, 170] - } - ] - }, - - {"JOB_TYPE": 2, # ID of the type of workflow (dependency graph) - "JOB_NAME": "img_to_sound", - "BEST_EXEC_TIME": 359.2, - "TASKS": [{"MODEL_NAME": "vit", - "MODEL_ID": 4, - "TASK_INDEX": 0, - "PREV_TASK_INDEX": [], - "NEXT_TASK_INDEX": [1,2], - "MODEL_SIZE": 1700000, # in kB - "INPUT_SIZE": 3000, # 224 x 224 x 3 shape, assuming 64 bits representation - "OUTPUT_SIZE": 20, - "EXECUTION_TIME": 283, # avg time, in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 500, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [283, 339, 407, 489, 590] - }, - {"MODEL_NAME": "NLI", - "MODEL_ID": 3, + {"MODEL_NAME": "text_encoder_2", + "MODEL_ID": 5, "TASK_INDEX": 1, "PREV_TASK_INDEX": [0], - "NEXT_TASK_INDEX": [3], - "MODEL_SIZE": 2140000, # in kB - "INPUT_SIZE": 20, # 299×299, assuming 64 bits representation - "OUTPUT_SIZE": 10, - "EXECUTION_TIME": 26, # in ms - "MAX_BATCH_SIZE": 2, - "MAX_WAIT_TIME": 100, # ms - "BATCH_SIZES": [1, 2], - "BATCH_EXEC_TIME": [26, 48] + "NEXT_TASK_INDEX": [2], + "MODEL_SIZE": 427000, # in kB + "INPUT_SIZE": 2, + "OUTPUT_SIZE": 4, + "EXECUTION_TIME": 17, # in ms + "MAX_BATCH_SIZE": 64, + "MAX_WAIT_TIME": 1, # ms + "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64], + "BATCH_EXEC_TIME": [17, 18, 18, 19, 19, 20, 22] }, - {"MODEL_NAME": "txt2speech", - "MODEL_ID": 5, + {"MODEL_NAME": "faiss_search", + "MODEL_ID": 6, "TASK_INDEX": 2, - "PREV_TASK_INDEX": [0], - "NEXT_TASK_INDEX": [3], - "MODEL_SIZE": 2700000, # in kB - "INPUT_SIZE": 20, - "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 76, # in ms - "MAX_BATCH_SIZE": 32, - "MAX_WAIT_TIME": 100, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16, 32], - "BATCH_EXEC_TIME": [76, 77, 82, 91, 106, 135] + "PREV_TASK_INDEX": [1], + "NEXT_TASK_INDEX": [3,4], + "MODEL_SIZE": 783000, # in kB + "INPUT_SIZE": 4, + "OUTPUT_SIZE": 2, + "EXECUTION_TIME": 0.4, # in ms + "MAX_BATCH_SIZE": 256, + "MAX_WAIT_TIME": 1, # ms + "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64, 128, 256], + "BATCH_EXEC_TIME": [0.4, 0.4, 0.4, 0.5, 0.5, 0.6, 0.8, 1.1, 1.6] }, - {"MODEL_NAME": "aggregate", - "MODEL_ID": -1, + {"MODEL_NAME": "text_check", + "MODEL_ID": 7, "TASK_INDEX": 3, - "PREV_TASK_INDEX": [1,2], - "NEXT_TASK_INDEX": [], - "MODEL_SIZE": -1, # in kB - "INPUT_SIZE": 3000, - "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 0.2, # in ms - "MAX_BATCH_SIZE": 64, - "MAX_WAIT_TIME": 100, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16, 32, 64], - "BATCH_EXEC_TIME": [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.3] - } - ] - }, - - {"JOB_TYPE": 3, # ID of the type of workflow (dependency graph) - "JOB_NAME": "ImageObjDetect", - "BEST_EXEC_TIME": 282.6, - "TASKS": [{"MODEL_NAME": "entry", - "MODEL_ID": -1, - "TASK_INDEX": 0, - "PREV_TASK_INDEX": [], - "NEXT_TASK_INDEX": [1,2], - "MODEL_SIZE": -1, # in kB - "INPUT_SIZE": 3000, - "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 0.6, # avg time, in ms - "MAX_BATCH_SIZE": 32, - "MAX_WAIT_TIME": 100, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16, 32], - "BATCH_EXEC_TIME": [0.6, 0.6, 0.6, 0.6, 0.6, 0.6] - }, - {"MODEL_NAME": "DETR", - "MODEL_ID": 8, - "TASK_INDEX": 1, - "PREV_TASK_INDEX": [0], - "NEXT_TASK_INDEX": [3], - "MODEL_SIZE": 1800000, # in kB - "INPUT_SIZE": 3000, # 299×299, assuming 64 bits representation - "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 178, # in ms + "PREV_TASK_INDEX": [2], + "NEXT_TASK_INDEX": [4], + "MODEL_SIZE": 7383000, # in kB + "INPUT_SIZE": 2, + "OUTPUT_SIZE": 2, + "EXECUTION_TIME": 17, # in ms "MAX_BATCH_SIZE": 4, - "MAX_WAIT_TIME": 500, # ms + "MAX_WAIT_TIME": 1, # ms "BATCH_SIZES": [1, 2, 4], - "BATCH_EXEC_TIME": [178, 267, 400] - }, - {"MODEL_NAME": "Depth", - "MODEL_ID": 9, - "TASK_INDEX": 2, - "PREV_TASK_INDEX": [0], - "NEXT_TASK_INDEX": [3], - "MODEL_SIZE": 3900000, # in kB - "INPUT_SIZE": 3000, - "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 147, # in ms - "MAX_BATCH_SIZE": 16, - "MAX_WAIT_TIME": 500, # ms - "BATCH_SIZES": [1, 2, 4, 8, 16], - "BATCH_EXEC_TIME": [147, 150, 155, 162, 172] + "BATCH_EXEC_TIME": [17, 25, 45] }, - {"MODEL_NAME": "Aggregate", - "MODEL_ID": -1, - "TASK_INDEX": 3, - "PREV_TASK_INDEX": [1,2], + {"MODEL_NAME": "text_to_speech", + "MODEL_ID": 8, + "TASK_INDEX": 4, + "PREV_TASK_INDEX": [2,3], "NEXT_TASK_INDEX": [], - "MODEL_SIZE": -1, # in kB - "INPUT_SIZE": 3000, - "OUTPUT_SIZE": 3000, - "EXECUTION_TIME": 104, # in ms - "MAX_BATCH_SIZE": 8, - "MAX_WAIT_TIME": 500, # ms - "BATCH_SIZES": [1, 2, 4, 8], - "BATCH_EXEC_TIME": [104, 130, 165, 213] + "MODEL_SIZE": 783000, # in kB + "INPUT_SIZE": 4, + "OUTPUT_SIZE": 10000, + "EXECUTION_TIME": 208, # in ms + "MAX_BATCH_SIZE": 1, + "MAX_WAIT_TIME": 1, # ms + "BATCH_SIZES": [1], + "BATCH_EXEC_TIME": [208] } ] } -] \ No newline at end of file +] From 05c3c8800639b38cdec86718d79c950e45ae1cc4 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 10 Jun 2025 16:58:39 -0400 Subject: [PATCH 20/41] lookahead model eviction --- workers/worker.py | 83 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/workers/worker.py b/workers/worker.py index 88cb292..a1f71e9 100644 --- a/workers/worker.py +++ b/workers/worker.py @@ -12,6 +12,7 @@ def __init__(self, simulation, num_free_slots, worker_id): self.worker_id = worker_id self.simulation = simulation self.num_free_slots = num_free_slots + self.current_batch = [] # track the currently executing batch (if any) self.GPU_memory_models = [] # Keep track of the list of models sitting in GPU memory at time: # {time-> list of model objects} : [ (time1,[model0,model1,]), (time2,[model1,...]),...] @@ -56,6 +57,26 @@ def used_GPUmemory(self, current_time, info_staleness=0, requiring_worker_id=Non return sum(m.model_size for m in models) # ---------- LOCAL MEMORY MANAGEMENT AND RETRIEVE ----------""" + def does_have_model(self, model, current_time: float, info_staleness=0) -> bool: + w_models = self.get_model_history(current_time, info_staleness) + return model in w_models + + def can_fit(self, min_required_memory: int, current_time: float, info_staleness=0) -> bool: + # if currently available memory >= min_required_memory + used_memory = self.used_GPUmemory(current_time, info_staleness=info_staleness) + if GPU_MEMORY_SIZE - used_memory >= min_required_memory: + return True + + # if not executing any batches or executing a batch with no model, + # existing models can be evicted to make space + if (self.current_batch == [] or self.current_batch[0].model == None) and \ + min_required_memory <= GPU_MEMORY_SIZE: + return True + + # if evicting all except current batch's required model can make enough space + if GPU_MEMORY_SIZE - self.current_batch[0].model.model_size >= min_required_memory: + return True + def fetch_model(self, model, current_time): """ Return: model transfer time required to execute the Task @@ -67,9 +88,8 @@ def fetch_model(self, model, current_time): if model is None: return 0 # First check if the model is stored locally: either on GPU, or systemRAM(home node) - w_models = self.get_model_history(current_time, info_staleness=0) # case1: if it is in local GPU already - if model in w_models: + if self.does_have_model(model, current_time): return 0 fetch_time = 0 fetch_time = SameMachineCPUtoGPU_delay(model.model_size) @@ -78,6 +98,63 @@ def fetch_model(self, model, current_time): self.add_model_to_memory_history(model, current_time + fetch_time) eviction_time = self.evict_model_from_GPU(current_time + fetch_time) return fetch_time + eviction_time + + # Required to be overriden + def get_next_tasks(self, lookahead_count: int, current_time: float, info_staleness=0): + """ + Returns a list of up to lookahead_count tasks in order of when they are + expected to begin execution on the worker. + """ + return [] + + + def _evict_models_from_GPU(self, models_to_evict, current_time): + eviction_duration = 0 + required_current_model = self.current_batch[0].model if self.current_batch else None + for model in models_to_evict: + if model != required_current_model: + self.simulation.metadata_service.rm_model_cached_location( + model, self.worker_id, current_time) + self.rm_model_in_memory_history(model, current_time) + eviction_duration += SameMachineGPUtoCPU_delay(model.model_size) + return eviction_duration + + + def evict_models_from_GPU_until(self, current_time: float, min_required_memory: int) -> float: + """ + Evicts models from GPU according to lookahead eviction policy until at least + min_required_memory space is available. Returns time taken to execute model + evictions. 0 if min_required_memory could not be created. + """ + if not self.can_fit(min_required_memory, current_time): + return 0 + + curr_memory = GPU_MEMORY_SIZE - self.used_GPUmemory(current_time) + + models_in_GPU = self.get_model_history(current_time, info_staleness=0) + required_current_model = self.current_batch[0].model if self.current_batch else None + next_models = set(map(lambda task: task.model, self.get_next_tasks(3))) + + models_to_evict = [] + + for i, model in enumerate(models_in_GPU): + # lowest priority models + if model not in next_models and model != required_current_model: + curr_memory -= model.model_size + models_to_evict.append(model) + if curr_memory >= min_required_memory: + return self._evict_models_from_GPU(models_to_evict) + + # next look at future models from latest -> earliest to be used + for model in next_models[::-1]: + if model in models_in_GPU and model != required_current_model: + curr_memory -= model.model_size + models_to_evict.append(model) + if curr_memory >= min_required_memory: + return self._evict_models_from_GPU(models_to_evict) + + return 0 + def evict_model_from_GPU(self, current_time): """ @@ -92,7 +169,7 @@ def evict_model_from_GPU(self, current_time): models_total_size += model.model_size eviction_index = 0 eviction_duration = 0 - while(models_total_size > GPU_MEMORY_SIZE): + while (models_total_size > GPU_MEMORY_SIZE): rm_model = models_in_GPU[eviction_index] self.simulation.metadata_service.rm_model_cached_location( rm_model, self.worker_id, current_time) From f9b8408cbda1936112d4cb37edf5b04008867d20 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Wed, 11 Jun 2025 15:50:13 -0400 Subject: [PATCH 21/41] eviction policy fix --- workers/worker.py | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/workers/worker.py b/workers/worker.py index a1f71e9..92ed859 100644 --- a/workers/worker.py +++ b/workers/worker.py @@ -17,7 +17,7 @@ def __init__(self, simulation, num_free_slots, worker_id): # Keep track of the list of models sitting in GPU memory at time: # {time-> list of model objects} : [ (time1,[model0,model1,]), (time2,[model1,...]),...] self.GPU_memory_models_history = [] - + self.models_in_use = [] def __hash__(self): return hash(self.worker_id) @@ -61,6 +61,10 @@ def does_have_model(self, model, current_time: float, info_staleness=0) -> bool: w_models = self.get_model_history(current_time, info_staleness) return model in w_models + def copies_in_memory(self, model, current_time: float, info_staleness=0) -> int: + w_models = self.get_model_history(current_time, info_staleness) + return w_models.count(model) + def can_fit(self, min_required_memory: int, current_time: float, info_staleness=0) -> bool: # if currently available memory >= min_required_memory used_memory = self.used_GPUmemory(current_time, info_staleness=info_staleness) @@ -99,20 +103,18 @@ def fetch_model(self, model, current_time): eviction_time = self.evict_model_from_GPU(current_time + fetch_time) return fetch_time + eviction_time - # Required to be overriden - def get_next_tasks(self, lookahead_count: int, current_time: float, info_staleness=0): + # NOTE: REQUIRED OVERRIDE + def get_next_models(self, lookahead_count: int, current_time: float, info_staleness=0): """ - Returns a list of up to lookahead_count tasks in order of when they are - expected to begin execution on the worker. + Returns a list of up to lookahead_count models in order of when they are + expected to be executed. """ return [] - def _evict_models_from_GPU(self, models_to_evict, current_time): eviction_duration = 0 - required_current_model = self.current_batch[0].model if self.current_batch else None for model in models_to_evict: - if model != required_current_model: + if model not in self.models_in_use: self.simulation.metadata_service.rm_model_cached_location( model, self.worker_id, current_time) self.rm_model_in_memory_history(model, current_time) @@ -125,6 +127,7 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory: Evicts models from GPU according to lookahead eviction policy until at least min_required_memory space is available. Returns time taken to execute model evictions. 0 if min_required_memory could not be created. + Assumes batches run in first task arrival order. """ if not self.can_fit(min_required_memory, current_time): return 0 @@ -132,26 +135,20 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory: curr_memory = GPU_MEMORY_SIZE - self.used_GPUmemory(current_time) models_in_GPU = self.get_model_history(current_time, info_staleness=0) - required_current_model = self.current_batch[0].model if self.current_batch else None - next_models = set(map(lambda task: task.model, self.get_next_tasks(3))) + next_models = self.get_next_models(3, current_time) + models_in_GPU_sorted = sorted( + models_in_GPU, + key=lambda m: next_models.index(m) if m in next_models else len(next_models), + reverse=True + ) models_to_evict = [] - - for i, model in enumerate(models_in_GPU): - # lowest priority models - if model not in next_models and model != required_current_model: - curr_memory -= model.model_size - models_to_evict.append(model) - if curr_memory >= min_required_memory: - return self._evict_models_from_GPU(models_to_evict) - - # next look at future models from latest -> earliest to be used - for model in next_models[::-1]: - if model in models_in_GPU and model != required_current_model: + for model in models_in_GPU_sorted: + if model not in self.models_in_use: curr_memory -= model.model_size models_to_evict.append(model) if curr_memory >= min_required_memory: - return self._evict_models_from_GPU(models_to_evict) + return self._evict_models_from_GPU(models_to_evict, current_time) return 0 From ae44ac7d5c4b1b2022b2c5a98823095cd557d541 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Wed, 11 Jun 2025 15:50:45 -0400 Subject: [PATCH 22/41] concurrent batch execution according to available GPU memory --- workers/taskworker.py | 173 +++++++++++++++++++++++------------------- 1 file changed, 95 insertions(+), 78 deletions(-) diff --git a/workers/taskworker.py b/workers/taskworker.py index b9211d2..02d6df9 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -20,18 +20,31 @@ def add_task(self, current_time, task): """ Add task into the local task queue """ - - # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived") - - # Update when the task is sent to the worker assert (task.log.task_placed_on_worker_queue_timestamp <= current_time) - self.add_task_to_queue_history(task, current_time) - _, task_end_events = self.maybe_start_task_for_type(current_time, task.task_type, task.max_wait_time) - return task_end_events - - def free_slot(self, current_time): + self.add_task_to_queue_history(task, current_time) # Update when the task is sent to the worker + return self.maybe_start_task_for_type(current_time, task.task_type, task.max_wait_time) + + def get_next_models(self, lookahead_count: int, current_time: float, info_staleness=0): + if lookahead_count <= 0: + return [] + + next_models = [] + task_types_by_arrival, task_queues = self.get_sorted_task_types(current_time) + for task_type in task_types_by_arrival: + next_model = task_queues[task_type][0].model + if next_model != None and next_model not in next_models: + next_models.append(next_model) + if len(next_models) == lookahead_count: + return next_models + + return next_models + + def free_slot(self, current_time, model): """ Frees a slot on the worker and attempts to launch another task in that slot. """ - self.num_free_slots += 1 + # self.num_free_slots += 1 + if model != None: + self.models_in_use.remove(model) + get_task_events = self.maybe_start_task_any(current_time) return get_task_events @@ -68,7 +81,29 @@ def schedule_job_heft(self, current_time, job): # --------------------------- TASK EXECUTION ---------------------- - def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[(int, int)], dict[(int, int), list[Task]]]: + _CAN_RUN_NOW = 0 + _CAN_RUN_ON_LOAD = 1 + _CANNOT_RUN = 2 + + def can_run_task(self, current_time: float, model: Model, info_staleness=0) -> int: + """ + Returns _CAN_RUN_NOW if model None, or model is on GPU and not currently in use. + Returns _CAN_RUN_ON_LOAD if model can be loaded onto the GPU (either by evicting + existing models not in use or simply using available memory). + Returns _CANNOT_RUN otherwise. + """ + if model == None: # doesn't use GPU + return self._CAN_RUN_NOW + # has >= 1 copies of model in memory that are not currently in use + elif self.does_have_model(model, current_time, info_staleness) and \ + self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0: + return self._CAN_RUN_NOW + elif self.can_fit(model.model_size, current_time, info_staleness): + return self._CAN_RUN_ON_LOAD + else: + return self._CANNOT_RUN + + def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[tuple[int, int]], dict[tuple[int, int], list[Task]]]: """ Returns a list of all task_types with at least 1 task queued on this worker in order of when they are scheduled to execute (e.g. task queue @@ -85,87 +120,69 @@ def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[(i ) return task_types_by_arrival, task_queues - - - def maybe_start_task_any(self, current_time): - task_types, task_queues = self.get_sorted_task_types(current_time) - - if self.num_free_slots > 0: - for task_type in task_types: - first_queued_task = task_queues[task_type][0] - if (current_time >= first_queued_task.log.task_placed_on_worker_queue_timestamp): - did_exec_batch, task_end_events = self.maybe_start_task_for_type( - current_time, task_type, first_queued_task.max_wait_time - ) - if did_exec_batch: - return task_end_events - # keep checking queue until batch is executed or tasks run out - - # if no queued tasks, maybe is never called and no wake up events are - # appended; in this case worker sleeps until a new task arrives - return [] + def _maybe_start_batch(self, task_queue: list[Task], current_time: float) -> list[EventOrders]: + # only wake up if existing tasks to avoid congestion since + # empty queue will wake up on next task enqueue + if len(task_queue) == 0: + return [] - def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> tuple[bool, list]: - """ - Execute a batch if there are free slots available and at least 1 task queued. - - Returns did_exec_batch : bool, task_end_events : list[Event] - """ + batch_end_events = [] latest_time = current_time - did_exec_batch = False - - task_end_events = [] - task_list = self.get_queue_history(current_time, task_type, info_staleness=0) - queued_tasks = queue.Queue() - [queued_tasks.put(task) for task in task_list] - - # form largest batch < max_batch_size possible - batch = [] - while (not queued_tasks.empty()) and self.num_free_slots > 0 and len(batch) < task_list[0].max_batch_size: - task = queued_tasks.get() - if (current_time >= task.log.task_placed_on_worker_queue_timestamp): - batch.append(task) + can_run = self.can_run_task(current_time, task_queue[0].model) + if can_run == self._CAN_RUN_ON_LOAD: + current_time += self.evict_models_from_GPU_until( + current_time, task_queue[0].model.model_size) - # full batch or max wait time has passed - if len(task_list) > 0 and self.num_free_slots > 0: - # print(f"[{current_time}] W{self.worker_id}: Batch of {task_list} executing") - - batch_end_events, task_end_time = self.batch_execute( - batch, current_time) + if can_run == self._CAN_RUN_NOW or can_run == self._CAN_RUN_ON_LOAD: + queued_tasks = queue.Queue() + [queued_tasks.put(task) for task in task_queue] + + # form largest batch < max_batch_size possible + batch = [] + while (not queued_tasks.empty()) and len(batch) < task_queue[0].max_batch_size: + task = queued_tasks.get() + if (current_time >= task.log.task_placed_on_worker_queue_timestamp): + batch.append(task) - # rm all tasks in batch - for task in batch: - self.rm_task_in_queue_history(task, current_time) - - latest_time = task_end_time - - did_exec_batch = True - task_end_events += batch_end_events + if len(batch) > 0: + batch_end_events, task_end_time = self.batch_execute(batch, current_time) + for task in batch: # rm all tasks in batch + self.rm_task_in_queue_history(task, current_time) + latest_time = task_end_time # track next wake up time so old wake ups can be skipped - next_check_time = latest_time + task_wait_time - self.next_check_times[task_type] = next_check_time + next_check_time = latest_time + task_queue[0].max_wait_time + self.next_check_times[task_queue[0].task_type] = next_check_time # if idle, check again in wait time # NOTE: for some reason, appending to task_end_events does not always # lead to event being enqueued; thus we enqueue directly to sim queue here - self.simulation.event_queue.put( - EventOrders( - next_check_time, - WorkerWakeUpEvent(self, task_type, task_wait_time) - ) - ) - - return did_exec_batch, task_end_events + self.simulation.event_queue.put(EventOrders( + next_check_time, + WorkerWakeUpEvent(self, + task_queue[0].task_type, + task_queue[0].max_wait_time))) + + return batch_end_events + + def maybe_start_task_any(self, current_time): + all_end_events = [] + task_types, task_queues = self.get_sorted_task_types(current_time) + for task_type in task_types: + all_end_events += self._maybe_start_batch(task_queues[task_type], current_time) + return all_end_events + + def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> tuple[bool, list]: + task_queue = self.get_queue_history(current_time, task_type, info_staleness=0) + return self._maybe_start_batch(task_queue, current_time) - # modify to handle a batch of tasks: - # need to model batch execution duration - # transfer to next step should handle a list of tasks def batch_execute(self, tasks, current_time): + self.current_batch = tasks + self.involved = True - self.num_free_slots -= 1 + self.models_in_use.append(tasks[0].model) model_fetch_time = self.fetch_model(tasks[0].model, current_time) batch_index = 0 @@ -195,7 +212,7 @@ def batch_execute(self, tasks, current_time): task.log.task_execution_end_timestamp = task_end_time task_end_events.append(EventOrders(task_end_time, BatchEndEvent( - self, job_ids=job_ids, task_type=tasks[0].task_type + self, tasks[0].model, job_ids=job_ids, task_type=tasks[0].task_type ))) return task_end_events, task_end_time From c85596f008e955e4c0829422ab0fb066e0e12e43 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Wed, 11 Jun 2025 15:51:41 -0400 Subject: [PATCH 23/41] add model data to track models in use --- core/events.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/core/events.py b/core/events.py index f3480d6..a6469f8 100644 --- a/core/events.py +++ b/core/events.py @@ -160,7 +160,7 @@ def __init__(self, worker, job_id=-1, task_id=-1): self.task_id = task_id # integer representing the task_id def run(self, current_time): - return self.worker.free_slot(current_time) + return self.worker.free_slot(current_time, self) def to_string(self): return "[Task End (Job {} - Task {}) at Worker {}] ===".format(self.job_id, self.task_id, self.worker.worker_id) @@ -169,13 +169,14 @@ def to_string(self): class BatchEndEvent(Event): """ Event to signify that a BATCH has been performed by the WORKER. """ - def __init__(self, worker, job_ids=[], task_type=(-1, -1)): + def __init__(self, worker, model, job_ids=[], task_type=(-1, -1)): self.worker = worker + self.model = model self.job_ids = job_ids # integers representing the job_ids self.task_type = task_type # (workflow_id, task_id) def run(self, current_time): - return self.worker.free_slot(current_time) + return self.worker.free_slot(current_time, self.model) def to_string(self): jobs = ",".join([str(id) for id in self.job_ids]) @@ -229,10 +230,8 @@ def __init__(self, worker, task_id, task_max_wait_time): def run(self, current_time): if self.will_run(current_time): - _, task_end_events = self.worker.maybe_start_task_for_type( - current_time, self.task_id, self.task_max_wait_time - ) - return task_end_events + return self.worker.maybe_start_task_for_type( + current_time, self.task_id, self.task_max_wait_time) return [] def to_string(self): From 3d5d72cb15f94f4b2dccbf9dc383801a866e729b Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Thu, 12 Jun 2025 10:57:09 -0400 Subject: [PATCH 24/41] can fit helper fix --- workers/taskworker.py | 3 ++- workers/worker.py | 11 ++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/workers/taskworker.py b/workers/taskworker.py index 02d6df9..ce24381 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -182,7 +182,8 @@ def batch_execute(self, tasks, current_time): self.current_batch = tasks self.involved = True - self.models_in_use.append(tasks[0].model) + if tasks[0].model != None: + self.models_in_use.append(tasks[0].model) model_fetch_time = self.fetch_model(tasks[0].model, current_time) batch_index = 0 diff --git a/workers/worker.py b/workers/worker.py index 92ed859..a3fcbe8 100644 --- a/workers/worker.py +++ b/workers/worker.py @@ -17,7 +17,7 @@ def __init__(self, simulation, num_free_slots, worker_id): # Keep track of the list of models sitting in GPU memory at time: # {time-> list of model objects} : [ (time1,[model0,model1,]), (time2,[model1,...]),...] self.GPU_memory_models_history = [] - self.models_in_use = [] + self.models_in_use = [] # models in use by a currently executing batch def __hash__(self): return hash(self.worker_id) @@ -71,14 +71,11 @@ def can_fit(self, min_required_memory: int, current_time: float, info_staleness= if GPU_MEMORY_SIZE - used_memory >= min_required_memory: return True - # if not executing any batches or executing a batch with no model, - # existing models can be evicted to make space - if (self.current_batch == [] or self.current_batch[0].model == None) and \ - min_required_memory <= GPU_MEMORY_SIZE: + if self.models_in_use == [] and min_required_memory <= GPU_MEMORY_SIZE: return True - # if evicting all except current batch's required model can make enough space - if GPU_MEMORY_SIZE - self.current_batch[0].model.model_size >= min_required_memory: + # if evicting all except current required models can make enough space + if GPU_MEMORY_SIZE - sum(map(lambda m: m.model_size, self.models_in_use)) >= min_required_memory: return True def fetch_model(self, model, current_time): From 00dc474f7c7c13d0e1dd587189f08bec4df9d4eb Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Thu, 12 Jun 2025 11:16:59 -0400 Subject: [PATCH 25/41] merge --- core/simulation.py | 1 + experiments/run_experiments.py | 10 +++++++++- workers/taskworker.py | 19 +++++++++++++++++-- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/core/simulation.py b/core/simulation.py index 22b3f1b..73df7c5 100644 --- a/core/simulation.py +++ b/core/simulation.py @@ -44,6 +44,7 @@ def __init__( self.result_to_export = pd.DataFrame() self.tasks_logging_times = pd.DataFrame() self.event_log = pd.DataFrame(columns=["time", "event"]) + self.batch_exec_log = pd.DataFrame(columns=["time", "worker_id", "workflow_id", "task_id", "batch_size", "model_exec_time", "batch_exec_time", "job_ids"]) print("---- SIMULATION : " + self.simulation_name + "----") self.produce_breakdown = produce_breakdown diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py index a14154e..a14bf70 100644 --- a/experiments/run_experiments.py +++ b/experiments/run_experiments.py @@ -12,7 +12,7 @@ # experiment_schedulers options: centralheft | decentralheft | hashtask experiment_schedulers = [] -plotting_job_type_list = [0, 1, 2, 3] +plotting_job_type_list = [0] # plotting_job_type_list = [2,3] np.random.seed(42) @@ -95,3 +95,11 @@ tasks_logging_times = sim.tasks_logging_times tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "loadDelay_" + str( LOAD_INFORMATION_STALENESS) + "_placementDelay_" + str(PLACEMENT_INFORMATION_STALENESS) + ".csv") + + sim.batch_exec_log.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "batch_log.csv") + + worker_model_histories = pd.concat(list(map(lambda w: w.model_history_log, sim.workers)), + keys=list(map(lambda w: w.worker_id, sim.workers)), + names=['worker_id']).reset_index(level='worker_id') + worker_model_histories = worker_model_histories.sort_values(by="start_time") + worker_model_histories.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "model_history_log.csv") diff --git a/workers/taskworker.py b/workers/taskworker.py index ce24381..3ebeb39 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -20,6 +20,10 @@ def add_task(self, current_time, task): """ Add task into the local task queue """ + + # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived") + + # Update when the task is sent to the worker assert (task.log.task_placed_on_worker_queue_timestamp <= current_time) self.add_task_to_queue_history(task, current_time) # Update when the task is sent to the worker return self.maybe_start_task_for_type(current_time, task.task_type, task.max_wait_time) @@ -95,8 +99,7 @@ def can_run_task(self, current_time: float, model: Model, info_staleness=0) -> i if model == None: # doesn't use GPU return self._CAN_RUN_NOW # has >= 1 copies of model in memory that are not currently in use - elif self.does_have_model(model, current_time, info_staleness) and \ - self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0: + elif self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0: return self._CAN_RUN_NOW elif self.can_fit(model.model_size, current_time, info_staleness): return self._CAN_RUN_ON_LOAD @@ -184,6 +187,7 @@ def batch_execute(self, tasks, current_time): self.involved = True if tasks[0].model != None: self.models_in_use.append(tasks[0].model) + model_fetch_time = self.fetch_model(tasks[0].model, current_time) batch_index = 0 @@ -212,6 +216,17 @@ def batch_execute(self, tasks, current_time): task.log.task_execution_start_timestamp = current_time + model_fetch_time task.log.task_execution_end_timestamp = task_end_time + self.simulation.batch_exec_log.loc[len(self.simulation.batch_exec_log)] = { + "time": current_time, + "worker_id": self.worker_id, + "workflow_id": tasks[0].task_type[0], + "task_id": tasks[0].task_id, + "batch_size": len(tasks), + "model_exec_time": tasks[0].batch_exec_time[batch_index], + "batch_exec_time": model_fetch_time + tasks[0].batch_exec_time[batch_index], + "job_ids": job_ids + } + task_end_events.append(EventOrders(task_end_time, BatchEndEvent( self, tasks[0].model, job_ids=job_ids, task_type=tasks[0].task_type ))) From 2c916e309bba23559b94f417b0e2b9ba9ebdcd17 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Thu, 12 Jun 2025 13:07:40 -0400 Subject: [PATCH 26/41] model logging & eviction fix --- workers/worker.py | 71 +++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/workers/worker.py b/workers/worker.py index a3fcbe8..7260498 100644 --- a/workers/worker.py +++ b/workers/worker.py @@ -2,7 +2,8 @@ from core.config import * from core.network import * from core.config import * -import sys + +import pandas as pd class Worker(object): @@ -19,6 +20,9 @@ def __init__(self, simulation, num_free_slots, worker_id): self.GPU_memory_models_history = [] self.models_in_use = [] # models in use by a currently executing batch + self.model_history_log = pd.DataFrame(columns=["start_time", "end_time", + "model_id", "placed_or_evicted"]) + def __hash__(self): return hash(self.worker_id) @@ -66,17 +70,33 @@ def copies_in_memory(self, model, current_time: float, info_staleness=0) -> int: return w_models.count(model) def can_fit(self, min_required_memory: int, current_time: float, info_staleness=0) -> bool: + # models currently being fetched = models in use - models loaded on GPU + loaded_models = self.get_model_history(current_time, info_staleness) + fetching_models = [] + for model in self.models_in_use: + if model in loaded_models: + loaded_models.remove(model) + else: + fetching_models.append(model) + + # loaded models + models currently being fetched + used_memory = self.used_GPUmemory(current_time, info_staleness=info_staleness) + \ + sum([model.model_size for model in fetching_models]) + # if currently available memory >= min_required_memory - used_memory = self.used_GPUmemory(current_time, info_staleness=info_staleness) if GPU_MEMORY_SIZE - used_memory >= min_required_memory: return True + # if no batches/current batches do not use GPU if self.models_in_use == [] and min_required_memory <= GPU_MEMORY_SIZE: return True - # if evicting all except current required models can make enough space - if GPU_MEMORY_SIZE - sum(map(lambda m: m.model_size, self.models_in_use)) >= min_required_memory: + # if evicting all except current required models & models being fetched can make enough space + if GPU_MEMORY_SIZE - sum(map(lambda m: m.model_size, self.models_in_use)) - \ + sum(map(lambda m: m.model_size, fetching_models)) >= min_required_memory: return True + + return False def fetch_model(self, model, current_time): """ @@ -92,13 +112,21 @@ def fetch_model(self, model, current_time): # case1: if it is in local GPU already if self.does_have_model(model, current_time): return 0 + fetch_time = 0 fetch_time = SameMachineCPUtoGPU_delay(model.model_size) + + self.model_history_log.loc[len(self.model_history_log)] = { + "start_time": current_time, + "end_time": current_time + fetch_time, + "model_id": model.model_id, + "placed_or_evicted": "placed" + } + self.simulation.metadata_service.add_model_cached_location( model, self.worker_id, current_time + fetch_time) self.add_model_to_memory_history(model, current_time + fetch_time) - eviction_time = self.evict_model_from_GPU(current_time + fetch_time) - return fetch_time + eviction_time + return fetch_time # NOTE: REQUIRED OVERRIDE def get_next_models(self, lookahead_count: int, current_time: float, info_staleness=0): @@ -116,6 +144,13 @@ def _evict_models_from_GPU(self, models_to_evict, current_time): model, self.worker_id, current_time) self.rm_model_in_memory_history(model, current_time) eviction_duration += SameMachineGPUtoCPU_delay(model.model_size) + + self.model_history_log.loc[len(self.model_history_log)] = { + "start_time": current_time, + "end_time": current_time + eviction_duration, + "model_id": model.model_id, + "placed_or_evicted": "evicted" + } return eviction_duration @@ -148,30 +183,6 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory: return self._evict_models_from_GPU(models_to_evict, current_time) return 0 - - - def evict_model_from_GPU(self, current_time): - """ - Do nothing if current cached models didn't exceed the GPU memory - remove this information to 2 histories: - 1. model_history on worker - 2. cache_history on metadata_service - """ - models_in_GPU = self.get_model_history(current_time, info_staleness=0) - models_total_size = 0 - for model in models_in_GPU: - models_total_size += model.model_size - eviction_index = 0 - eviction_duration = 0 - while (models_total_size > GPU_MEMORY_SIZE): - rm_model = models_in_GPU[eviction_index] - self.simulation.metadata_service.rm_model_cached_location( - rm_model, self.worker_id, current_time) - self.rm_model_in_memory_history(rm_model, current_time) - models_total_size -= rm_model.model_size - eviction_index += 1 - eviction_duration += SameMachineGPUtoCPU_delay(rm_model.model_size) - return eviction_duration # ------------------------- cached model history update helper functions --------------- def add_model_to_memory_history(self, model, current_time): From b04b800945dae4aaa6e26b6a48f110000354aafb Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Fri, 6 Jun 2025 15:36:38 -0400 Subject: [PATCH 27/41] merge: job logging --- core/events.py | 19 +++++++++++++++++++ workers/taskworker.py | 7 +++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/core/events.py b/core/events.py index a6469f8..4ededcc 100644 --- a/core/events.py +++ b/core/events.py @@ -166,6 +166,25 @@ def to_string(self): return "[Task End (Job {} - Task {}) at Worker {}] ===".format(self.job_id, self.task_id, self.worker.worker_id) +class BatchStartEvent(Event): + """ + Event to signify that a BATCH has started executing in WORKER. + Only for logging purposes. + """ + + def __init__(self, worker, job_ids=[], task_type=(-1, -1)): + self.worker = worker + self.job_ids = job_ids # list[int] with the job_ids in the batch + self.task_type = task_type # (workflow_id, task_id) identifying the batch task_type + + def run(self, current_time): + return [] + + def to_string(self): + jobs = ",".join([str(id) for id in self.job_ids]) + return f"[Batch Start (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]" + + class BatchEndEvent(Event): """ Event to signify that a BATCH has been performed by the WORKER. """ diff --git a/workers/taskworker.py b/workers/taskworker.py index 3ebeb39..96db516 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -21,7 +21,7 @@ def add_task(self, current_time, task): Add task into the local task queue """ - # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived") + print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived") # Update when the task is sent to the worker assert (task.log.task_placed_on_worker_queue_timestamp <= current_time) @@ -182,7 +182,7 @@ def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> return self._maybe_start_batch(task_queue, current_time) def batch_execute(self, tasks, current_time): - self.current_batch = tasks + assert(len(tasks) > 0) # cannot launch empty batch self.involved = True if tasks[0].model != None: @@ -227,6 +227,9 @@ def batch_execute(self, tasks, current_time): "job_ids": job_ids } + task_end_events.append(EventOrders(current_time + model_fetch_time, BatchStartEvent( + self, job_ids=job_ids, task_type=tasks[0].task_type + ))) task_end_events.append(EventOrders(task_end_time, BatchEndEvent( self, tasks[0].model, job_ids=job_ids, task_type=tasks[0].task_type ))) From 086256e6031f446fa2705ecb1b12999ebe2db95d Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Fri, 6 Jun 2025 15:36:38 -0400 Subject: [PATCH 28/41] merge fix --- core/events.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/core/events.py b/core/events.py index 4ededcc..3bb6202 100644 --- a/core/events.py +++ b/core/events.py @@ -185,6 +185,22 @@ def to_string(self): return f"[Batch Start (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]" +class BatchEndEvent(Event): + """ Event to signify that a BATCH has been performed by the WORKER. """ + + def __init__(self, worker, job_ids=[], task_type=(-1, -1)): + self.worker = worker + self.job_ids = job_ids # list[int] with the job_ids in the batch + self.task_type = task_type # (workflow_id, task_id) identifying the batch task_type + + def run(self, current_time): + return [] + + def to_string(self): + jobs = ",".join([str(id) for id in self.job_ids]) + return f"[Batch Start (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]" + + class BatchEndEvent(Event): """ Event to signify that a BATCH has been performed by the WORKER. """ From 11a9557fe5d318a0e575186086c80a8c7daac7b4 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 20 May 2025 11:13:34 -0400 Subject: [PATCH 29/41] merge: job logging --- core/simulation.py | 13 +++++++------ experiments/parse_results.py | 2 +- experiments/run_experiments.py | 7 +++++++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/core/simulation.py b/core/simulation.py index 73df7c5..d9b538d 100644 --- a/core/simulation.py +++ b/core/simulation.py @@ -106,8 +106,8 @@ def run_finish(self, last_time, by_job_type=False): def produce_time_breakdown_results(self, completed_jobs): dataframe = pd.DataFrame(columns=["job_id", "load_info_staleness", "placement_info_staleness", "req_inter_arrival_delay", - "workflow_type", "scheduler_type", "slowdown", "response_time"]) - dataframe_tasks_log = pd.DataFrame(columns=["workflow_type", "task_id", "time_to_buffer", "dependency_wait_time", + "workflow_type", "job_create_time", "scheduler_type", "slowdown", "response_time"]) + dataframe_tasks_log = pd.DataFrame(columns=["workflow_type", "task_id", "task_arrival_time", "task_start_exec_time", "time_to_buffer", "dependency_wait_time", "time_spent_in_queue", "model_fetching_time", "execution_time"]) for index, completed_job in enumerate(completed_jobs): @@ -122,7 +122,7 @@ def produce_time_breakdown_results(self, completed_jobs): if "JOB_CREATION_INTERVAL" in WORKFLOW_LIST[completed_job.job_type_id]: job_creation_interval = WORKFLOW_LIST[completed_job.job_type_id]["JOB_CREATION_INTERVAL"] dataframe.loc[index] = [index, LOAD_INFORMATION_STALENESS, PLACEMENT_INFORMATION_STALENESS, job_creation_interval, completed_job.job_type_id, - self.simulation_name, slowdown, response_time] + completed_job.create_time, self.simulation_name, slowdown, response_time] task_index = 0 for job in completed_jobs: @@ -143,9 +143,10 @@ def produce_time_breakdown_results(self, completed_jobs): assert model_fetching_time >= 0 assert execution_time >= 0 - dataframe_tasks_log.loc[task_index] = [job.job_type_id, task.task_id, time_to_buffer, - dependency_wait_time, time_spent_in_queue, model_fetching_time, execution_time] + dataframe_tasks_log.loc[task_index] = [job.job_type_id, task.task_id, task.log.task_arrival_at_worker_buffer_timestamp, + task.log.task_execution_start_timestamp,time_to_buffer, dependency_wait_time, + time_spent_in_queue, model_fetching_time, execution_time] task_index += 1 self.tasks_logging_times = dataframe_tasks_log - self.result_to_export = dataframe \ No newline at end of file + self.result_to_export = dataframe diff --git a/experiments/parse_results.py b/experiments/parse_results.py index ee2ecfe..3c37dc8 100644 --- a/experiments/parse_results.py +++ b/experiments/parse_results.py @@ -155,4 +155,4 @@ def gen_stats(job_df, event_df): plot_batch_size_bar_chart(events_df, out_path) plot_batch_size_vs_batch_start(events_df, out_path) -plot_response_time_vs_arrival_time(job_df, out_path) \ No newline at end of file +plot_response_time_vs_arrival_time(job_df, out_path) diff --git a/experiments/run_experiments.py b/experiments/run_experiments.py index a14bf70..26c1c5d 100644 --- a/experiments/run_experiments.py +++ b/experiments/run_experiments.py @@ -56,10 +56,14 @@ event_log.to_csv(OUTPUT_FILE_NAMES["centralheft"] + "events_by_time.csv") # result_to_export = sim.result_to_export + result_to_export = sim.result_to_export + result_to_export.to_csv(OUTPUT_FILE_NAMES["centralheft"] + "job_breakdown.csv") + tasks_logging_times = sim.tasks_logging_times tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["centralheft"] + "loadDelay_" + str( LOAD_INFORMATION_STALENESS) + "_placementDelay_" + str(PLACEMENT_INFORMATION_STALENESS) + ".csv") + if "hashtask" in experiment_schedulers: OUTPUT_FILENAME = "hashtask" sim = Simulation_central(simulation_name="hashtask", job_split="PER_TASK", @@ -92,6 +96,9 @@ event_log = sim.event_log event_log.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "events_by_time.csv") + result_to_export = sim.result_to_export + result_to_export.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "job_breakdown.csv") + tasks_logging_times = sim.tasks_logging_times tasks_logging_times.to_csv(OUTPUT_FILE_NAMES["decentralheft"] + "loadDelay_" + str( LOAD_INFORMATION_STALENESS) + "_placementDelay_" + str(PLACEMENT_INFORMATION_STALENESS) + ".csv") From e5b5516ae6e3c91867ab272c89a477204dfdaa14 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Thu, 12 Jun 2025 13:35:01 -0400 Subject: [PATCH 30/41] plot model loading --- experiments/parse_results.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/experiments/parse_results.py b/experiments/parse_results.py index 3c37dc8..dbd484f 100644 --- a/experiments/parse_results.py +++ b/experiments/parse_results.py @@ -144,6 +144,18 @@ def gen_stats(job_df, event_df): # print(f"TPUT: {len(job_df) / event_df.loc[len(events_df)-1]["time"]}") +def plot_model_loading_histogram(model_df, out_path): + fig = plt.figure(figsize=(8, 6)) + + plt.hist(model_df["start_time"], bins=15, edgecolor='black') + + plt.xlabel("Time") + plt.ylabel("Number of models loaded") + plt.title(f"Model Loading Over Time") + + plt.savefig(os.path.join(out_path, f"model_loading_hist.png")) + + results_dir_path = sys.argv[1] # results/ out_path = sys.argv[2] if len(sys.argv) > 2 else "parsed_results" @@ -152,7 +164,9 @@ def gen_stats(job_df, event_df): job_df = pd.read_csv(os.path.join(results_dir_path, "job_breakdown.csv")) # task_df = pd.read_csv(os.path.join(results_dir_path, "loadDelay_1_placementDelay_1.csv")) events_df = pd.read_csv(os.path.join(results_dir_path, 'events_by_time.csv')) +model_df = pd.read_csv(os.path.join(results_dir_path, "model_history_log.csv")) +plot_model_loading_histogram(model_df, out_path) plot_batch_size_bar_chart(events_df, out_path) plot_batch_size_vs_batch_start(events_df, out_path) plot_response_time_vs_arrival_time(job_df, out_path) From 8547b8cb874c3ef9f4fd24b03208f53e0401f42c Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Thu, 12 Jun 2025 14:27:04 -0400 Subject: [PATCH 31/41] fetch fix and policy choice --- workers/worker.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/workers/worker.py b/workers/worker.py index 7260498..6182f08 100644 --- a/workers/worker.py +++ b/workers/worker.py @@ -106,11 +106,9 @@ def fetch_model(self, model, current_time): 1. model_history on worker 2. cache_history on metadata_service """ - if model is None: - return 0 - # First check if the model is stored locally: either on GPU, or systemRAM(home node) - # case1: if it is in local GPU already - if self.does_have_model(model, current_time): + # check if exists a copy of the model not currently in use + if model is None or \ + self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0: return 0 fetch_time = 0 @@ -153,10 +151,12 @@ def _evict_models_from_GPU(self, models_to_evict, current_time): } return eviction_duration + LOOKAHEAD_EVICTION = 0 + FCFS_EVICTION = 1 - def evict_models_from_GPU_until(self, current_time: float, min_required_memory: int) -> float: + def evict_models_from_GPU_until(self, current_time: float, min_required_memory: int, policy: int) -> float: """ - Evicts models from GPU according to lookahead eviction policy until at least + Evicts models from GPU according to FCFS or lookahead eviction policy until at least min_required_memory space is available. Returns time taken to execute model evictions. 0 if min_required_memory could not be created. Assumes batches run in first task arrival order. @@ -167,15 +167,16 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory: curr_memory = GPU_MEMORY_SIZE - self.used_GPUmemory(current_time) models_in_GPU = self.get_model_history(current_time, info_staleness=0) - next_models = self.get_next_models(3, current_time) - models_in_GPU_sorted = sorted( - models_in_GPU, - key=lambda m: next_models.index(m) if m in next_models else len(next_models), - reverse=True - ) + if policy == self.LOOKAHEAD_EVICTION: + next_models = self.get_next_models(3, current_time) + models_in_GPU = sorted( + models_in_GPU, + key=lambda m: next_models.index(m) if m in next_models else len(next_models), + reverse=True + ) models_to_evict = [] - for model in models_in_GPU_sorted: + for model in models_in_GPU: if model not in self.models_in_use: curr_memory -= model.model_size models_to_evict.append(model) From 50fe4765205b8a12b4e52f05d3bc4d962ce48120 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Thu, 12 Jun 2025 14:27:25 -0400 Subject: [PATCH 32/41] policy choice --- workers/taskworker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workers/taskworker.py b/workers/taskworker.py index 96db516..6ef5d4e 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -21,7 +21,7 @@ def add_task(self, current_time, task): Add task into the local task queue """ - print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived") + # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived") # Update when the task is sent to the worker assert (task.log.task_placed_on_worker_queue_timestamp <= current_time) @@ -136,7 +136,7 @@ def _maybe_start_batch(self, task_queue: list[Task], current_time: float) -> lis can_run = self.can_run_task(current_time, task_queue[0].model) if can_run == self._CAN_RUN_ON_LOAD: current_time += self.evict_models_from_GPU_until( - current_time, task_queue[0].model.model_size) + current_time, task_queue[0].model.model_size, self.LOOKAHEAD_EVICTION) if can_run == self._CAN_RUN_NOW or can_run == self._CAN_RUN_ON_LOAD: queued_tasks = queue.Queue() From adf3c7016b36cf72353dac5f488cebe182a99c39 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Thu, 12 Jun 2025 15:23:00 -0400 Subject: [PATCH 33/41] fixed loading plot; added eviction plot --- experiments/parse_results.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/experiments/parse_results.py b/experiments/parse_results.py index dbd484f..e2923a3 100644 --- a/experiments/parse_results.py +++ b/experiments/parse_results.py @@ -147,7 +147,7 @@ def gen_stats(job_df, event_df): def plot_model_loading_histogram(model_df, out_path): fig = plt.figure(figsize=(8, 6)) - plt.hist(model_df["start_time"], bins=15, edgecolor='black') + plt.hist(model_df[model_df["placed_or_evicted"] == "placed"]["start_time"], bins=15, edgecolor='black') plt.xlabel("Time") plt.ylabel("Number of models loaded") @@ -156,6 +156,18 @@ def plot_model_loading_histogram(model_df, out_path): plt.savefig(os.path.join(out_path, f"model_loading_hist.png")) +def plot_model_eviction_histogram(model_df, out_path): + fig = plt.figure(figsize=(8, 6)) + + plt.hist(model_df[model_df["placed_or_evicted"] == "evicted"]["start_time"], bins=15, edgecolor='black') + + plt.xlabel("Time") + plt.ylabel("Number of models evicted") + plt.title(f"Model Eviction Over Time") + + plt.savefig(os.path.join(out_path, f"model_eviction_hist.png")) + + results_dir_path = sys.argv[1] # results/ out_path = sys.argv[2] if len(sys.argv) > 2 else "parsed_results" @@ -167,6 +179,7 @@ def plot_model_loading_histogram(model_df, out_path): model_df = pd.read_csv(os.path.join(results_dir_path, "model_history_log.csv")) plot_model_loading_histogram(model_df, out_path) +plot_model_eviction_histogram(model_df, out_path) plot_batch_size_bar_chart(events_df, out_path) plot_batch_size_vs_batch_start(events_df, out_path) plot_response_time_vs_arrival_time(job_df, out_path) From 70c048991da31c68d483cfac33db2e58276766f8 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 17 Jun 2025 10:47:52 -0400 Subject: [PATCH 34/41] gpu state refactor for accurate allocation handling --- workers/model_state.py | 192 +++++++++++++++++++++++++++++++++++++++++ workers/taskworker.py | 141 ++++++++++++++++-------------- workers/worker.py | 192 ++++++++--------------------------------- 3 files changed, 304 insertions(+), 221 deletions(-) create mode 100644 workers/model_state.py diff --git a/workers/model_state.py b/workers/model_state.py new file mode 100644 index 0000000..d6acefe --- /dev/null +++ b/workers/model_state.py @@ -0,0 +1,192 @@ +import copy + +from core.config import * +from core.model import Model + + +class ModelState: + PLACED = 0 + IN_FETCH = 1 + IN_EVICT = 2 + + def __init__(self, model: Model, state: int, is_reserved_for_batch=True): + assert(state in [self.PLACED, self.IN_FETCH, self.IN_EVICT]) + self.model = model + self.state = state + self.is_reserved_for_batch = is_reserved_for_batch + + def __eq__(self, value): + return type(value) == ModelState and self.model == value.model and self.state == value.state + + def __str__(self): + return f"<[{self._state_to_str()}] [{"NOT " if not self.is_reserved_for_batch else ""}IN USE] Model ID: {self.model.model_id}>" + + def __repr__(self): + return self.__str__() + + def _state_to_str(self): + if self.state == self.PLACED: return "Placed" + elif self.state == self.IN_FETCH: return "Fetching" + elif self.state == self.IN_EVICT: return "Evicting" + + +class GPUState(object): + def __init__(self): + # sorted (asc) list of GPU states [(time, [model states])] + self._model_states = [] + + def reserved_memory(self, time: float) -> float: + """ + Returns the total GPU memory that is currently in use, either + for currently placed models, models that are being fetched, or + models that are being evicted. + """ + return sum(state.model.model_size for state in self.state_at(time)) + + def available_memory(self, time: float) -> float: + """ + Returns total GPU memory not occupied by a model, reserved for + a model being fetched, or used by a model being evicted. + """ + return GPU_MEMORY_SIZE - self.reserved_memory(time) + + def can_fetch_model(self, model: Model, time: float) -> bool: + """ + Returns True if a new copy of [model] can be fetched to the + GPU as is with no evictions. + """ + return self.available_memory(time) >= model.model_size + + def can_fetch_model_on_eviction(self, model: Model, time: float) -> bool: + """ + Return True if a new copy of [model] can be fetched to the GPU + upon evicting some number of placed models not in use. + """ + # cannot use space occupied by models currently being fetched/evicted or used + return (self.available_memory(time) + \ + sum(state.model.model_size for state in self.state_at(time) + if state.state == ModelState.PLACED and not state.is_reserved_for_batch)) >= model.model_size + + def _insert_state_marker(self, marker_time: float, at_marker_modify, post_marker_modify): + """ + Internal helper to update states at exactly [marker_time] with + [at_marker_modify: (time, old_states) -> new_states] and states + after [marker_time] with + [post_marker_modify: (time, old_states) -> new_states]. + """ + did_add_marker = False + for i in range(len(self._model_states)-1, -1, -1): + (timestamp, states) = self._model_states[i] + if timestamp == marker_time: + at_marker_modify(timestamp, states) + did_add_marker = True + elif timestamp < marker_time: + if not did_add_marker: + state_copy = copy.deepcopy(states) + at_marker_modify(timestamp, state_copy) + self._model_states.insert(i+1, (marker_time, state_copy)) + did_add_marker = True + return + else: + post_marker_modify(timestamp, states) + + if not did_add_marker: + states = [] + at_marker_modify(marker_time, states) + self._model_states.insert(0, (marker_time, states)) + + def fetch_model(self, model: Model, start_time: float, fetch_time: float): + """ + Fetches a new copy of [model] to the GPU if there is enough available + memory without additional evictions. + """ + assert(model != None) + assert(self.can_fetch_model(model, start_time)) + + fetch_end_time = start_time + fetch_time + + if len(self._model_states) == 0: + # mark when fetch begins and ends + self._model_states.append((start_time, [ModelState(model, ModelState.IN_FETCH)])) + self._model_states.append((fetch_end_time, [ModelState(model, ModelState.PLACED)])) + return + + # add fetch end marker + self._insert_state_marker(fetch_end_time, + lambda _, states: states.append(ModelState(model, ModelState.PLACED)), + lambda _, states: states.append(ModelState(model, ModelState.PLACED))) + + # add fetch start marker + self._insert_state_marker(start_time, + lambda _, states: states.append(ModelState(model, ModelState.IN_FETCH)), + lambda t, states: states.append(ModelState(model, ModelState.IN_FETCH)) if t < fetch_end_time else None) + + + def evict_model(self, model: Model, start_time: float, evict_time: float): + assert(model in self.placed_models(start_time)) + + eviction_end_time = start_time + evict_time + + # remove model from all later timestamps + def _remove_model(timestamp, states): + for state in states: + if state.state == ModelState.PLACED and state.model == model: + states.remove(state) + return + assert(False) + + self._insert_state_marker(eviction_end_time, _remove_model, _remove_model) + + def _begin_model_eviction(timestamp, states): + for state in states: + if state.state == ModelState.PLACED and state.model == model and not state.is_reserved_for_batch: + state.state = ModelState.IN_EVICT + return + assert(False) # should not happen: no model exists to evict + + # add eviction start marker + self._insert_state_marker(start_time, _begin_model_eviction, + lambda t, states: _begin_model_eviction(t, states) if t < eviction_end_time else None) + + + def state_at(self, time: float) -> list[ModelState]: + for (timestamp, states) in self._model_states[::-1]: + if timestamp <= time: + return states + return [] + + def placed_models(self, time: float) -> list[Model]: + return [state.model for state in self.state_at(time) if state.state == ModelState.PLACED] + + def placed_model_states(self, time: float) -> list[ModelState]: + states = self.state_at(time) + if len(states) == 0: + return [] + return [state for state in states if state.state == ModelState.PLACED] + + def does_have_idle_copy(self, model: Model, time: float) -> bool: + return any(state.model == model and not state.is_reserved_for_batch for state in self.placed_model_states(time)) + + def reserve_idle_copy(self, model: Model, time: float): + assert(self.does_have_idle_copy(model, time)) + + def _occupy_one_copy(timestamp, states): + for j, state in enumerate(states): + if state.model == model and \ + state.state == ModelState.PLACED and \ + not state.is_reserved_for_batch: + states[j].is_reserved_for_batch = True + return + assert(False) # should not reach! (no idle copies) + + # reserve 1 idle copy from start to exec end + self._insert_state_marker(time, _occupy_one_copy, _occupy_one_copy) + + def release_busy_copy(self, model: Model, time: float): + def _release_one_copy(timestamp, states): + for i, state in enumerate(states): + if state.model == model and state.state == ModelState.PLACED and state.is_reserved_for_batch: + states[i].is_reserved_for_batch = False + return + + self._insert_state_marker(time, _release_one_copy, _release_one_copy) \ No newline at end of file diff --git a/workers/taskworker.py b/workers/taskworker.py index 6ef5d4e..24a02a6 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -14,19 +14,21 @@ def __init__(self, simulation, num_free_slots, worker_id): # keep track of the queue information at time: [ (time1,[task0,task1,]), (time2,[task1,...]),...] self.queue_history = {} self.involved = False - self.next_check_times = {} + self.max_wait_times = {} def add_task(self, current_time, task): """ Add task into the local task queue """ - - # print(f"[{current_time}] W{self.worker_id}: T{task.task_type} arrived") - # Update when the task is sent to the worker assert (task.log.task_placed_on_worker_queue_timestamp <= current_time) self.add_task_to_queue_history(task, current_time) # Update when the task is sent to the worker - return self.maybe_start_task_for_type(current_time, task.task_type, task.max_wait_time) + + # Initialize max wait time + if task.task_type not in self.max_wait_times or self.max_wait_times[task.task_type] < 0: + self.max_wait_times[task.task_type] = current_time + task.max_wait_time + + return self.maybe_start_batch(current_time, task.task_type) def get_next_models(self, lookahead_count: int, current_time: float, info_staleness=0): if lookahead_count <= 0: @@ -43,13 +45,17 @@ def get_next_models(self, lookahead_count: int, current_time: float, info_stalen return next_models - def free_slot(self, current_time, model): - """ Frees a slot on the worker and attempts to launch another task in that slot. """ - # self.num_free_slots += 1 + def free_slot(self, current_time, model, task_type): + """ Attempts to launch another task. """ if model != None: - self.models_in_use.remove(model) + self.GPU_state.release_busy_copy(model, current_time) + + get_task_events = [] + task_types, task_queues = self.get_sorted_task_types(current_time) + for task_type in task_types: + batch_end_events = self._maybe_start_batch(task_queues[task_type], current_time) + get_task_events += batch_end_events - get_task_events = self.maybe_start_task_any(current_time) return get_task_events # --------------------------- DECENTRALIZED WORKER SCHEDULING ---------------------- @@ -86,25 +92,26 @@ def schedule_job_heft(self, current_time, job): # --------------------------- TASK EXECUTION ---------------------- _CAN_RUN_NOW = 0 - _CAN_RUN_ON_LOAD = 1 + _CAN_RUN_ON_EVICT = 1 _CANNOT_RUN = 2 def can_run_task(self, current_time: float, model: Model, info_staleness=0) -> int: """ Returns _CAN_RUN_NOW if model None, or model is on GPU and not currently in use. - Returns _CAN_RUN_ON_LOAD if model can be loaded onto the GPU (either by evicting - existing models not in use or simply using available memory). + Returns _CAN_RUN_ON_EVICT if model can be loaded onto the GPU upon evicting + unused models. Returns _CANNOT_RUN otherwise. """ - if model == None: # doesn't use GPU + if model == None or self.GPU_state.does_have_idle_copy(model, current_time): + return self._CAN_RUN_NOW + + if self.GPU_state.can_fetch_model(model, current_time): return self._CAN_RUN_NOW - # has >= 1 copies of model in memory that are not currently in use - elif self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0: - return self._CAN_RUN_NOW - elif self.can_fit(model.model_size, current_time, info_staleness): - return self._CAN_RUN_ON_LOAD - else: - return self._CANNOT_RUN + + if self.GPU_state.can_fetch_model_on_eviction(model, current_time): + return self._CAN_RUN_ON_EVICT + + return self._CANNOT_RUN def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[tuple[int, int]], dict[tuple[int, int], list[Task]]]: """ @@ -113,37 +120,44 @@ def get_sorted_task_types(self, current_time, info_staleness=0) -> tuple[list[tu at index 0 is the next to be executed when a slot opens up on the worker) in addition to a map of all task_types to their task queues. """ - task_types = self.queue_history.keys() task_queues = { task_type: self.get_queue_history(current_time, task_type, info_staleness) for task_type in task_types } - - task_types_by_arrival = sorted( - filter(lambda task_type: len(task_queues[task_type]) > 0, task_types), - key=lambda task_type: task_queues[task_type][0].log.task_placed_on_worker_queue_timestamp, - ) - return task_types_by_arrival, task_queues + types_to_preempt = sorted(filter( + lambda task_type: len(task_queues[task_type]) > 0 and \ + self.max_wait_times[task_type] >= 0 and self.max_wait_times[task_type] <= current_time, task_types), + key=lambda task_type: self.max_wait_times[task_type]) + types_by_arrival = sorted(filter(lambda task_type: len(task_queues[task_type]) > 0 and \ + self.max_wait_times[task_type] > current_time, task_types), + key=lambda task_type: task_queues[task_type][0].log.task_placed_on_worker_queue_timestamp) + + return (types_to_preempt + types_by_arrival), task_queues def _maybe_start_batch(self, task_queue: list[Task], current_time: float) -> list[EventOrders]: + """ + Attempts to start a batch drawn from [task_queue]. If there is not + enough GPU memory or the [task_queue] is empty, does nothing. If a + batch is started, updates task type's next wake up to max_wait_time + + earliest remaining task's arrival. + """ # only wake up if existing tasks to avoid congestion since # empty queue will wake up on next task enqueue if len(task_queue) == 0: return [] + batch = [] batch_end_events = [] - latest_time = current_time - + can_run = self.can_run_task(current_time, task_queue[0].model) - if can_run == self._CAN_RUN_ON_LOAD: + if can_run == self._CAN_RUN_ON_EVICT: current_time += self.evict_models_from_GPU_until( current_time, task_queue[0].model.model_size, self.LOOKAHEAD_EVICTION) - if can_run == self._CAN_RUN_NOW or can_run == self._CAN_RUN_ON_LOAD: + if can_run == self._CAN_RUN_NOW or can_run == self._CAN_RUN_ON_EVICT: queued_tasks = queue.Queue() [queued_tasks.put(task) for task in task_queue] # form largest batch < max_batch_size possible - batch = [] while (not queued_tasks.empty()) and len(batch) < task_queue[0].max_batch_size: task = queued_tasks.get() if (current_time >= task.log.task_placed_on_worker_queue_timestamp): @@ -153,49 +167,52 @@ def _maybe_start_batch(self, task_queue: list[Task], current_time: float) -> lis batch_end_events, task_end_time = self.batch_execute(batch, current_time) for task in batch: # rm all tasks in batch self.rm_task_in_queue_history(task, current_time) - latest_time = task_end_time - - # track next wake up time so old wake ups can be skipped - next_check_time = latest_time + task_queue[0].max_wait_time - self.next_check_times[task_queue[0].task_type] = next_check_time - - # if idle, check again in wait time - # NOTE: for some reason, appending to task_end_events does not always - # lead to event being enqueued; thus we enqueue directly to sim queue here - self.simulation.event_queue.put(EventOrders( - next_check_time, - WorkerWakeUpEvent(self, - task_queue[0].task_type, - task_queue[0].max_wait_time))) - + + # if successfully launched batch, reset max wait time + if not queued_tasks.empty(): + earliest_remaining_arrival = -1 + while not queued_tasks.empty(): + task = queued_tasks.get() + if earliest_remaining_arrival < 0 or \ + task.log.task_placed_on_worker_queue_timestamp < earliest_remaining_arrival: + earliest_remaining_arrival = task.log.task_placed_on_worker_queue_timestamp + self.max_wait_times[batch[0].task_type] = earliest_remaining_arrival + batch[0].max_wait_time + else: + self.max_wait_times[batch[0].task_type] = -1 + return batch_end_events - def maybe_start_task_any(self, current_time): - all_end_events = [] - task_types, task_queues = self.get_sorted_task_types(current_time) - for task_type in task_types: - all_end_events += self._maybe_start_batch(task_queues[task_type], current_time) - return all_end_events - - def maybe_start_task_for_type(self, current_time, task_type, task_wait_time) -> tuple[bool, list]: + def maybe_start_batch(self, current_time: float, task_type: tuple[int, int]): + """ + Attempts to launch a batch of [task_type]. Does nothing if there are no + tasks of [task_type] queued. + """ task_queue = self.get_queue_history(current_time, task_type, info_staleness=0) return self._maybe_start_batch(task_queue, current_time) def batch_execute(self, tasks, current_time): + """ + Fetches a new copy or reserves an idle copy of any required GPU models + and executes the batch [tasks]. Returns a list containing the + BatchEndEvent and the batch execution end time. + """ assert(len(tasks) > 0) # cannot launch empty batch self.involved = True - if tasks[0].model != None: - self.models_in_use.append(tasks[0].model) - model_fetch_time = self.fetch_model(tasks[0].model, current_time) - batch_index = 0 for i, batch_size in enumerate(sorted(tasks[0].batch_sizes)): if len(tasks) <= batch_size: # choose smallest batch size > len(tasks) batch_index = i break + model_fetch_time = 0 + if tasks[0].model != None: + if self.GPU_state.does_have_idle_copy(tasks[0].model, current_time): + self.GPU_state.reserve_idle_copy(tasks[0].model, current_time) + else: + model_fetch_time = self.fetch_model(tasks[0].model, current_time) + task_end_time = current_time + model_fetch_time + tasks[0].batch_exec_time[batch_index] task_end_events = [] @@ -227,13 +244,9 @@ def batch_execute(self, tasks, current_time): "job_ids": job_ids } - task_end_events.append(EventOrders(current_time + model_fetch_time, BatchStartEvent( - self, job_ids=job_ids, task_type=tasks[0].task_type - ))) task_end_events.append(EventOrders(task_end_time, BatchEndEvent( self, tasks[0].model, job_ids=job_ids, task_type=tasks[0].task_type ))) - return task_end_events, task_end_time # --------------------------- Subsequent TASK Transfer -------------------- diff --git a/workers/worker.py b/workers/worker.py index 6182f08..685b183 100644 --- a/workers/worker.py +++ b/workers/worker.py @@ -5,6 +5,8 @@ import pandas as pd +from workers.model_state import * + class Worker(object): """ Abstract class representing workers. """ @@ -15,14 +17,12 @@ def __init__(self, simulation, num_free_slots, worker_id): self.num_free_slots = num_free_slots self.current_batch = [] # track the currently executing batch (if any) self.GPU_memory_models = [] - # Keep track of the list of models sitting in GPU memory at time: - # {time-> list of model objects} : [ (time1,[model0,model1,]), (time2,[model1,...]),...] - self.GPU_memory_models_history = [] - self.models_in_use = [] # models in use by a currently executing batch + self.GPU_state = GPUState() self.model_history_log = pd.DataFrame(columns=["start_time", "end_time", "model_id", "placed_or_evicted"]) + def __hash__(self): return hash(self.worker_id) @@ -51,69 +51,18 @@ def initial_model_placement(self, model): return 1 return 1 - def used_GPUmemory(self, current_time, info_staleness=0, requiring_worker_id=None) -> int: - """ - Helper function for local GPU memory usage check - """ - if requiring_worker_id == self.worker_id: - info_staleness = 0 - models = self.get_model_history(current_time, info_staleness) - return sum(m.model_size for m in models) - # ---------- LOCAL MEMORY MANAGEMENT AND RETRIEVE ----------""" - def does_have_model(self, model, current_time: float, info_staleness=0) -> bool: - w_models = self.get_model_history(current_time, info_staleness) - return model in w_models - - def copies_in_memory(self, model, current_time: float, info_staleness=0) -> int: - w_models = self.get_model_history(current_time, info_staleness) - return w_models.count(model) - - def can_fit(self, min_required_memory: int, current_time: float, info_staleness=0) -> bool: - # models currently being fetched = models in use - models loaded on GPU - loaded_models = self.get_model_history(current_time, info_staleness) - fetching_models = [] - for model in self.models_in_use: - if model in loaded_models: - loaded_models.remove(model) - else: - fetching_models.append(model) - - # loaded models + models currently being fetched - used_memory = self.used_GPUmemory(current_time, info_staleness=info_staleness) + \ - sum([model.model_size for model in fetching_models]) - - # if currently available memory >= min_required_memory - if GPU_MEMORY_SIZE - used_memory >= min_required_memory: - return True - - # if no batches/current batches do not use GPU - if self.models_in_use == [] and min_required_memory <= GPU_MEMORY_SIZE: - return True - - # if evicting all except current required models & models being fetched can make enough space - if GPU_MEMORY_SIZE - sum(map(lambda m: m.model_size, self.models_in_use)) - \ - sum(map(lambda m: m.model_size, fetching_models)) >= min_required_memory: - return True - - return False - def fetch_model(self, model, current_time): - """ - Return: model transfer time required to execute the Task - Every "task" requires one "model" to be executed correctly - add this information to 2 histories: - 1. model_history on worker - 2. cache_history on metadata_service - """ - # check if exists a copy of the model not currently in use - if model is None or \ - self.copies_in_memory(model, current_time) - self.models_in_use.count(model) > 0: + if model == None or self.GPU_state.does_have_idle_copy(model, current_time): return 0 fetch_time = 0 fetch_time = SameMachineCPUtoGPU_delay(model.model_size) + self.simulation.metadata_service.add_model_cached_location( + model, self.worker_id, current_time + fetch_time) + self.GPU_state.fetch_model(model, current_time, fetch_time) + self.model_history_log.loc[len(self.model_history_log)] = { "start_time": current_time, "end_time": current_time + fetch_time, @@ -121,9 +70,6 @@ def fetch_model(self, model, current_time): "placed_or_evicted": "placed" } - self.simulation.metadata_service.add_model_cached_location( - model, self.worker_id, current_time + fetch_time) - self.add_model_to_memory_history(model, current_time + fetch_time) return fetch_time # NOTE: REQUIRED OVERRIDE @@ -135,20 +81,22 @@ def get_next_models(self, lookahead_count: int, current_time: float, info_stalen return [] def _evict_models_from_GPU(self, models_to_evict, current_time): + # NOTE: Assumes any number of models can be evicted concurrently! eviction_duration = 0 for model in models_to_evict: - if model not in self.models_in_use: - self.simulation.metadata_service.rm_model_cached_location( - model, self.worker_id, current_time) - self.rm_model_in_memory_history(model, current_time) - eviction_duration += SameMachineGPUtoCPU_delay(model.model_size) - - self.model_history_log.loc[len(self.model_history_log)] = { - "start_time": current_time, - "end_time": current_time + eviction_duration, - "model_id": model.model_id, - "placed_or_evicted": "evicted" - } + self.simulation.metadata_service.rm_model_cached_location( + model, self.worker_id, current_time) + + evict_time = SameMachineGPUtoCPU_delay(model.model_size) + self.GPU_state.evict_model(model, current_time, evict_time) + eviction_duration = max(evict_time, eviction_duration) + + self.model_history_log.loc[len(self.model_history_log)] = { + "start_time": current_time, + "end_time": current_time + eviction_duration, + "model_id": model.model_id, + "placed_or_evicted": "evicted" + } return eviction_duration LOOKAHEAD_EVICTION = 0 @@ -159,93 +107,30 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory: Evicts models from GPU according to FCFS or lookahead eviction policy until at least min_required_memory space is available. Returns time taken to execute model evictions. 0 if min_required_memory could not be created. - Assumes batches run in first task arrival order. + Assumes batches run in earliest task arrival order. """ - if not self.can_fit(min_required_memory, current_time): - return 0 - - curr_memory = GPU_MEMORY_SIZE - self.used_GPUmemory(current_time) + curr_memory = self.GPU_state.available_memory(current_time) - models_in_GPU = self.get_model_history(current_time, info_staleness=0) + placed_model_states = self.GPU_state.placed_model_states(current_time) if policy == self.LOOKAHEAD_EVICTION: next_models = self.get_next_models(3, current_time) - models_in_GPU = sorted( - models_in_GPU, - key=lambda m: next_models.index(m) if m in next_models else len(next_models), + placed_model_states = sorted( + placed_model_states, + key=lambda m: next_models.index(m.model) if m.model in next_models else len(next_models), reverse=True ) models_to_evict = [] - for model in models_in_GPU: - if model not in self.models_in_use: - curr_memory -= model.model_size - models_to_evict.append(model) + for state in placed_model_states: + if not state.is_reserved_for_batch: + curr_memory += state.model.model_size + models_to_evict.append(state.model) if curr_memory >= min_required_memory: return self._evict_models_from_GPU(models_to_evict, current_time) - + return 0 # ------------------------- cached model history update helper functions --------------- - def add_model_to_memory_history(self, model, current_time): - assert (model.model_size <= GPU_MEMORY_SIZE) - last_index = len(self.GPU_memory_models_history) - 1 - # 0. base case - if last_index == -1: - self.GPU_memory_models_history.append((current_time, [model])) - return - # 1. Find the time_stamp place to add this queue information - while last_index >= 0: - if self.GPU_memory_models_history[last_index][0] == current_time: - if model not in self.GPU_memory_models_history[last_index][1]: - self.GPU_memory_models_history[last_index][1].append(model) - break - if self.GPU_memory_models_history[last_index][0] < current_time: - if model not in self.GPU_memory_models_history[last_index][1]: - next_queue = self.GPU_memory_models_history[last_index][1].copy( - ) - next_queue.append(model) - last_index += 1 - self.GPU_memory_models_history.insert( - last_index, (current_time, next_queue) - ) - break - # check the previous entry - last_index -= 1 - # 2. added the worker_id to all the subsequent timestamp tuples - while last_index < len(self.GPU_memory_models_history): - if model not in self.GPU_memory_models_history[last_index][1]: - self.GPU_memory_models_history[last_index][1].append(model) - last_index += 1 - - def rm_model_in_memory_history(self, model, current_time): - last_index = len(self.GPU_memory_models_history) - 1 - # 0. base case: shouldn't happen - if last_index == -1: - AssertionError("rm model cached location to an empty list") - return - # 1. find the place to add this remove_event to the tuple list - while last_index >= 0: - if self.GPU_memory_models_history[last_index][0] == current_time: - if model in self.GPU_memory_models_history[last_index][1]: - self.GPU_memory_models_history[last_index][1].remove(model) - break - if self.GPU_memory_models_history[last_index][0] < current_time: - if model in self.GPU_memory_models_history[last_index][1]: - next_tasks_in_memory = self.GPU_memory_models_history[last_index][1].copy( - ) - next_tasks_in_memory.remove(model) - last_index = last_index + 1 - self.GPU_memory_models_history.insert( - last_index, (current_time, next_tasks_in_memory) - ) - break - last_index -= 1 # go to prev time - # 2. remove the task from all the subsequent tuple - while last_index < len(self.GPU_memory_models_history): - if model in self.GPU_memory_models_history[last_index]: - self.GPU_memory_models_history[last_index][1].remove(model) - last_index += 1 # do this for the remaining element after - def get_history(self, history, current_time, info_staleness) -> list: delayed_time = current_time - info_staleness last_index = len(history) - 1 @@ -253,11 +138,4 @@ def get_history(self, history, current_time, info_staleness) -> list: if history[last_index][0] <= delayed_time: return history[last_index][1].copy() last_index -= 1 # check the previous one - return [] - - def get_model_history(self, current_time, info_staleness=0, requiring_workerid= None) -> list: - if requiring_workerid == self.worker_id: - info_staleness = 0 - return self.get_history(self.GPU_memory_models_history, current_time, info_staleness) - - + return [] \ No newline at end of file From 84e1c0d92d03ec10569907876e1e0bc1302af976 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 17 Jun 2025 10:48:22 -0400 Subject: [PATCH 35/41] cache logic update for gpu state --- schedulers/algo/nav_heft_algo.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/schedulers/algo/nav_heft_algo.py b/schedulers/algo/nav_heft_algo.py index 1f2e2e3..51dac17 100644 --- a/schedulers/algo/nav_heft_algo.py +++ b/schedulers/algo/nav_heft_algo.py @@ -98,9 +98,9 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co available_memory = GPU_MEMORY_SIZE if consider_cache: - available_memory = workers[worker_id].used_GPUmemory(current_time, \ - info_staleness=PLACEMENT_INFORMATION_STALENESS, \ - requiring_worker_id=initial_worker_id) + available_memory = workers[worker_id].GPU_state.available_memory(current_time) + # info_staleness=PLACEMENT_INFORMATION_STALENESS, \ + # requiring_worker_id=initial_worker_id) workers_available_memory[worker_id] = available_memory # Select the best worker for each task based on their ranking from high to low @@ -128,13 +128,12 @@ def nav_heft_job_plan(job, worker_list, current_time, initial_worker_id=None, co model_fetch_time = 0 cur_fetching_model_size = 0 if consider_cache: - models_in_cur_worker = workers[cur_worker_id].get_model_history(current_time, \ - info_staleness=PLACEMENT_INFORMATION_STALENESS, \ - requiring_workerid= initial_worker_id) - if cur_task.model is not None and cur_task.model not in models_in_cur_worker: + # TODO: info staleness + if cur_task.model is not None and \ + not workers[cur_worker_id].GPU_state.does_have_idle_copy(cur_task.model, current_time): model_fetch_time = SameMachineCPUtoGPU_delay(cur_task.model.model_size) cur_fetching_model_size = cur_task.model.model_size - if workers_available_memory[cur_worker_id] + cur_task.model.model_size > GPU_MEMORY_SIZE: + if not workers[cur_worker_id].GPU_state.can_fetch_model(cur_task.model, current_time): # double model fetch time due to the overhead from model_eviction model_fetch_time += model_fetch_time cur_earliest_start_time += model_fetch_time From 86566d111419465a1cd8ff70b2fbcf7fa4b81603 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 17 Jun 2025 10:49:20 -0400 Subject: [PATCH 36/41] remove batch start --- core/events.py | 81 +------------------------------------------------- 1 file changed, 1 insertion(+), 80 deletions(-) diff --git a/core/events.py b/core/events.py index 3bb6202..c815a90 100644 --- a/core/events.py +++ b/core/events.py @@ -150,57 +150,6 @@ def run(self, current_time): def to_string(self): return "[Intermediate Results Arrival]: worker:" + str(self.worker.worker_id) + ", prev_task_id:" + str(self.prev_task.task_id) + ", cur_task_id:" + str(self.cur_task.task_id) - -class TaskEndEvent(Event): - """ Event to signify that a TASK has been performed by the WORKER. """ - - def __init__(self, worker, job_id=-1, task_id=-1): - self.worker = worker - self.job_id = job_id # integer representing the job_id - self.task_id = task_id # integer representing the task_id - - def run(self, current_time): - return self.worker.free_slot(current_time, self) - - def to_string(self): - return "[Task End (Job {} - Task {}) at Worker {}] ===".format(self.job_id, self.task_id, self.worker.worker_id) - - -class BatchStartEvent(Event): - """ - Event to signify that a BATCH has started executing in WORKER. - Only for logging purposes. - """ - - def __init__(self, worker, job_ids=[], task_type=(-1, -1)): - self.worker = worker - self.job_ids = job_ids # list[int] with the job_ids in the batch - self.task_type = task_type # (workflow_id, task_id) identifying the batch task_type - - def run(self, current_time): - return [] - - def to_string(self): - jobs = ",".join([str(id) for id in self.job_ids]) - return f"[Batch Start (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]" - - -class BatchEndEvent(Event): - """ Event to signify that a BATCH has been performed by the WORKER. """ - - def __init__(self, worker, job_ids=[], task_type=(-1, -1)): - self.worker = worker - self.job_ids = job_ids # list[int] with the job_ids in the batch - self.task_type = task_type # (workflow_id, task_id) identifying the batch task_type - - def run(self, current_time): - return [] - - def to_string(self): - jobs = ",".join([str(id) for id in self.job_ids]) - return f"[Batch Start (Task {self.task_type}, Jobs {jobs}) at Worker {self.worker.worker_id}]" - - class BatchEndEvent(Event): """ Event to signify that a BATCH has been performed by the WORKER. """ @@ -211,7 +160,7 @@ def __init__(self, worker, model, job_ids=[], task_type=(-1, -1)): self.task_type = task_type # (workflow_id, task_id) def run(self, current_time): - return self.worker.free_slot(current_time, self.model) + return self.worker.free_slot(current_time, self.model, self.task_type) def to_string(self): jobs = ",".join([str(id) for id in self.job_ids]) @@ -251,34 +200,6 @@ def run(self, current_time): def to_string(self): return "[Job End] ===" - -class WorkerWakeUpEvent(Event): - """ - Event to signify that max_wait_time has passed and worker should - check task queue. - """ - - def __init__(self, worker, task_id, task_max_wait_time): - self.worker = worker - self.task_id = task_id - self.task_max_wait_time = task_max_wait_time - - def run(self, current_time): - if self.will_run(current_time): - return self.worker.maybe_start_task_for_type( - current_time, self.task_id, self.task_max_wait_time) - return [] - - def to_string(self): - return f"[Worker (id: {self.worker.worker_id}) Wake Up (task id: {self.task_id})]" - - def will_run(self, current_time): - # skip current wake up if a later wake up has been scheduled - if self.task_id in self.worker.next_check_times: - return current_time >= self.worker.next_check_times[self.task_id] - return True # if no batch has been run yet, wake up should be executed - - class EventOrders: """ Used so that the Simulation keeps track of the priority queue order From 9af0327a43c48632f67edc1b2c65c6d589d8d6f7 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 17 Jun 2025 13:57:46 -0400 Subject: [PATCH 37/41] reserving GPU space for model fetching upon eviction --- workers/model_state.py | 65 ++++++++++++++++++++++++++++++++++-------- workers/worker.py | 46 ++++++++++++++++-------------- 2 files changed, 78 insertions(+), 33 deletions(-) diff --git a/workers/model_state.py b/workers/model_state.py index d6acefe..b0e71ce 100644 --- a/workers/model_state.py +++ b/workers/model_state.py @@ -6,12 +6,13 @@ class ModelState: PLACED = 0 - IN_FETCH = 1 - IN_EVICT = 2 + PRE_FETCH = 1 # reserved for a model that will be fetched + IN_FETCH = 2 + IN_EVICT = 3 - def __init__(self, model: Model, state: int, is_reserved_for_batch=True): - assert(state in [self.PLACED, self.IN_FETCH, self.IN_EVICT]) + def __init__(self, model: Model, state: int, is_reserved_for_batch=True, size=0): self.model = model + self.size = size if size > 0 else model.model_size self.state = state self.is_reserved_for_batch = is_reserved_for_batch @@ -19,7 +20,7 @@ def __eq__(self, value): return type(value) == ModelState and self.model == value.model and self.state == value.state def __str__(self): - return f"<[{self._state_to_str()}] [{"NOT " if not self.is_reserved_for_batch else ""}IN USE] Model ID: {self.model.model_id}>" + return f"<[{self._state_to_str()}] [{"NOT " if not self.is_reserved_for_batch else ""}IN USE] Model ID: {self.model.model_id if self.model else -1}>" def __repr__(self): return self.__str__() @@ -28,6 +29,7 @@ def _state_to_str(self): if self.state == self.PLACED: return "Placed" elif self.state == self.IN_FETCH: return "Fetching" elif self.state == self.IN_EVICT: return "Evicting" + elif self.state == self.PRE_FETCH: return "Reserved" class GPUState(object): @@ -38,15 +40,14 @@ def __init__(self): def reserved_memory(self, time: float) -> float: """ Returns the total GPU memory that is currently in use, either - for currently placed models, models that are being fetched, or - models that are being evicted. + for currently placed models, models that are being fetched, + models that are being evicted, or models that will be fetched. """ - return sum(state.model.model_size for state in self.state_at(time)) + return sum(state.size for state in self.state_at(time)) def available_memory(self, time: float) -> float: """ - Returns total GPU memory not occupied by a model, reserved for - a model being fetched, or used by a model being evicted. + Returns total GPU memory that is not reserved (see reserved_memory). """ return GPU_MEMORY_SIZE - self.reserved_memory(time) @@ -64,7 +65,7 @@ def can_fetch_model_on_eviction(self, model: Model, time: float) -> bool: """ # cannot use space occupied by models currently being fetched/evicted or used return (self.available_memory(time) + \ - sum(state.model.model_size for state in self.state_at(time) + sum(state.size for state in self.state_at(time) if state.state == ModelState.PLACED and not state.is_reserved_for_batch)) >= model.model_size def _insert_state_marker(self, marker_time: float, at_marker_modify, post_marker_modify): @@ -122,7 +123,13 @@ def fetch_model(self, model: Model, start_time: float, fetch_time: float): lambda t, states: states.append(ModelState(model, ModelState.IN_FETCH)) if t < fetch_end_time else None) - def evict_model(self, model: Model, start_time: float, evict_time: float): + def evict_model(self, model: Model, start_time: float, evict_time: float, reserve_until=-1): + """ + Evicts [model] starting at [start_time] in [evict_time] time. + Reserves evicted space until [reserve_until]. This prevents other models + from being loaded in space that may be intended to fetch a specific model. + Does not reserve if [reserve_until] < 0. + """ assert(model in self.placed_models(start_time)) eviction_end_time = start_time + evict_time @@ -147,7 +154,33 @@ def _begin_model_eviction(timestamp, states): # add eviction start marker self._insert_state_marker(start_time, _begin_model_eviction, lambda t, states: _begin_model_eviction(t, states) if t < eviction_end_time else None) + + if reserve_until >= 0: + self.reserve_model_space(model, model.model_size, eviction_end_time, reserve_until) + + def reserve_model_space(self, model: Model, size: float, start_time: float, end_time: float): + """ + Reserves [size] extra space for [model] from [start_time] to [end_time]. + Used during evictions when additional space must be reserved in addition + to space from evicted or currently evicting models for when [model] is + fetched. Prevents other models from being fetched in space made for + [model]. + """ + assert(size > 0) + # mark reservation start + self._insert_state_marker(start_time, + lambda _, states: states.append(ModelState(model, ModelState.PRE_FETCH, size=size)), + lambda t, states: states.append(ModelState(model, ModelState.PRE_FETCH, size=size)) if t < end_time else None) + + def _remove_reservation(timestamp, states): + for state in states: + if state.model == model and state.state == ModelState.PRE_FETCH and state.size == size: + states.remove(state) + return + + # mark reservation end + self._insert_state_marker(end_time, _remove_reservation, lambda _, states: None) def state_at(self, time: float) -> list[ModelState]: for (timestamp, states) in self._model_states[::-1]: @@ -168,6 +201,11 @@ def does_have_idle_copy(self, model: Model, time: float) -> bool: return any(state.model == model and not state.is_reserved_for_batch for state in self.placed_model_states(time)) def reserve_idle_copy(self, model: Model, time: float): + """ + If there is an idle copy of [model], reserve it to execute a batch + starting from [time]. When execution finishes, a call to + [release_busy_copy] is required. + """ assert(self.does_have_idle_copy(model, time)) def _occupy_one_copy(timestamp, states): @@ -183,6 +221,9 @@ def _occupy_one_copy(timestamp, states): self._insert_state_marker(time, _occupy_one_copy, _occupy_one_copy) def release_busy_copy(self, model: Model, time: float): + """ + Releases a previously occupied/reserved copy of [model] at [time]. + """ def _release_one_copy(timestamp, states): for i, state in enumerate(states): if state.model == model and state.state == ModelState.PLACED and state.is_reserved_for_batch: diff --git a/workers/worker.py b/workers/worker.py index 685b183..3a5a2ef 100644 --- a/workers/worker.py +++ b/workers/worker.py @@ -80,25 +80,6 @@ def get_next_models(self, lookahead_count: int, current_time: float, info_stalen """ return [] - def _evict_models_from_GPU(self, models_to_evict, current_time): - # NOTE: Assumes any number of models can be evicted concurrently! - eviction_duration = 0 - for model in models_to_evict: - self.simulation.metadata_service.rm_model_cached_location( - model, self.worker_id, current_time) - - evict_time = SameMachineGPUtoCPU_delay(model.model_size) - self.GPU_state.evict_model(model, current_time, evict_time) - eviction_duration = max(evict_time, eviction_duration) - - self.model_history_log.loc[len(self.model_history_log)] = { - "start_time": current_time, - "end_time": current_time + eviction_duration, - "model_id": model.model_id, - "placed_or_evicted": "evicted" - } - return eviction_duration - LOOKAHEAD_EVICTION = 0 FCFS_EVICTION = 1 @@ -126,8 +107,31 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory: curr_memory += state.model.model_size models_to_evict.append(state.model) if curr_memory >= min_required_memory: - return self._evict_models_from_GPU(models_to_evict, current_time) - + # NOTE: Assumes models can be evicted concurrently + model_evict_times = list(map(lambda m: SameMachineGPUtoCPU_delay(m.model_size), models_to_evict)) + eviction_duration = max(model_evict_times) + full_eviction_end = current_time + eviction_duration + + # must reserve space to prevent other models from loading in space created here + extra_to_reserve = min_required_memory - sum(m.model_size for m in models_to_evict) + if extra_to_reserve > 0: + self.GPU_state.reserve_model_space(None, extra_to_reserve, current_time, full_eviction_end) + + for i in range(len(models_to_evict)): + self.simulation.metadata_service.rm_model_cached_location( + models_to_evict[i], self.worker_id, current_time) + self.GPU_state.evict_model(models_to_evict[i], + current_time, + model_evict_times[i], + reserve_until=full_eviction_end) + + self.model_history_log.loc[len(self.model_history_log)] = { + "start_time": current_time, + "end_time": current_time + eviction_duration, + "model_id": models_to_evict[i].model_id, + "placed_or_evicted": "evicted" + } + return eviction_duration return 0 # ------------------------- cached model history update helper functions --------------- From 6b79f44d7ad2910e88ef20655321f98e26779e47 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 17 Jun 2025 14:14:41 -0400 Subject: [PATCH 38/41] removed wake up --- schedulers/decentralized/simulation_decentral.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/schedulers/decentralized/simulation_decentral.py b/schedulers/decentralized/simulation_decentral.py index cc80ac5..104c537 100644 --- a/schedulers/decentralized/simulation_decentral.py +++ b/schedulers/decentralized/simulation_decentral.py @@ -51,8 +51,10 @@ def run(self): while self.remaining_jobs > 0: cur_event = self.event_queue.get() - if type(cur_event.event) != WorkerWakeUpEvent or cur_event.event.will_run(cur_event.current_time): - self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()] + print(cur_event.to_string()) + print(f"Jobs left: {self.remaining_jobs}") + + self.event_log.loc[len(self.event_log)] = [cur_event.current_time, cur_event.to_string()] assert cur_event.current_time >= last_time last_time = cur_event.current_time @@ -60,6 +62,4 @@ def run(self): for new_event in new_events: last_time = cur_event.current_time self.event_queue.put(new_event) - self.run_finish(last_time, by_job_type=True) - - + self.run_finish(last_time, by_job_type=True) \ No newline at end of file From 0120870259e6c31a75231ad3bec1fd184b556723 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Tue, 17 Jun 2025 14:17:08 -0400 Subject: [PATCH 39/41] to str fix --- workers/model_state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workers/model_state.py b/workers/model_state.py index b0e71ce..c09090f 100644 --- a/workers/model_state.py +++ b/workers/model_state.py @@ -20,7 +20,7 @@ def __eq__(self, value): return type(value) == ModelState and self.model == value.model and self.state == value.state def __str__(self): - return f"<[{self._state_to_str()}] [{"NOT " if not self.is_reserved_for_batch else ""}IN USE] Model ID: {self.model.model_id if self.model else -1}>" + return f"<[{self._state_to_str()}] [{'NOT ' if not self.is_reserved_for_batch else ''}IN USE] Model ID: {self.model.model_id if self.model else -1}>" def __repr__(self): return self.__str__() From 26076cba35f0831173e93cb004a500f5f17fbe2f Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Wed, 18 Jun 2025 08:09:58 -0400 Subject: [PATCH 40/41] cannot load duplicates --- workers/taskworker.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/workers/taskworker.py b/workers/taskworker.py index 24a02a6..9cc350e 100644 --- a/workers/taskworker.py +++ b/workers/taskworker.py @@ -105,6 +105,10 @@ def can_run_task(self, current_time: float, model: Model, info_staleness=0) -> i if model == None or self.GPU_state.does_have_idle_copy(model, current_time): return self._CAN_RUN_NOW + # cannot load additional copies of the same model + if any(map(lambda s: s.model == model, self.GPU_state.state_at(current_time))): + return self._CANNOT_RUN + if self.GPU_state.can_fetch_model(model, current_time): return self._CAN_RUN_NOW From f268dc226b568cac77ce86617413638b3f776670 Mon Sep 17 00:00:00 2001 From: Tami Takada Date: Wed, 18 Jun 2025 08:11:10 -0400 Subject: [PATCH 41/41] no eviction time --- workers/model_state.py | 30 +++++++++++++++--------------- workers/worker.py | 28 ++++++++++++---------------- 2 files changed, 27 insertions(+), 31 deletions(-) diff --git a/workers/model_state.py b/workers/model_state.py index c09090f..cfdd764 100644 --- a/workers/model_state.py +++ b/workers/model_state.py @@ -65,7 +65,7 @@ def can_fetch_model_on_eviction(self, model: Model, time: float) -> bool: """ # cannot use space occupied by models currently being fetched/evicted or used return (self.available_memory(time) + \ - sum(state.size for state in self.state_at(time) + sum(state.size for state in self.state_at(time) if state.state == ModelState.PLACED and not state.is_reserved_for_batch)) >= model.model_size def _insert_state_marker(self, marker_time: float, at_marker_modify, post_marker_modify): @@ -113,12 +113,12 @@ def fetch_model(self, model: Model, start_time: float, fetch_time: float): return # add fetch end marker - self._insert_state_marker(fetch_end_time, + self._insert_state_marker(fetch_end_time, lambda _, states: states.append(ModelState(model, ModelState.PLACED)), lambda _, states: states.append(ModelState(model, ModelState.PLACED))) # add fetch start marker - self._insert_state_marker(start_time, + self._insert_state_marker(start_time, lambda _, states: states.append(ModelState(model, ModelState.IN_FETCH)), lambda t, states: states.append(ModelState(model, ModelState.IN_FETCH)) if t < fetch_end_time else None) @@ -144,19 +144,19 @@ def _remove_model(timestamp, states): self._insert_state_marker(eviction_end_time, _remove_model, _remove_model) - def _begin_model_eviction(timestamp, states): - for state in states: - if state.state == ModelState.PLACED and state.model == model and not state.is_reserved_for_batch: - state.state = ModelState.IN_EVICT - return - assert(False) # should not happen: no model exists to evict + # def _begin_model_eviction(timestamp, states): + # for state in states: + # if state.state == ModelState.PLACED and state.model == model and not state.is_reserved_for_batch: + # state.state = ModelState.IN_EVICT + # return + # assert(False) # should not happen: no model exists to evict # add eviction start marker - self._insert_state_marker(start_time, _begin_model_eviction, - lambda t, states: _begin_model_eviction(t, states) if t < eviction_end_time else None) + # self._insert_state_marker(start_time, _begin_model_eviction, + # lambda t, states: _begin_model_eviction(t, states) if t < eviction_end_time else None) - if reserve_until >= 0: - self.reserve_model_space(model, model.model_size, eviction_end_time, reserve_until) + # if reserve_until >= 0: + # self.reserve_model_space(model, model.model_size, eviction_end_time, reserve_until) def reserve_model_space(self, model: Model, size: float, start_time: float, end_time: float): """ @@ -193,7 +193,7 @@ def placed_models(self, time: float) -> list[Model]: def placed_model_states(self, time: float) -> list[ModelState]: states = self.state_at(time) - if len(states) == 0: + if len(states) == 0: return [] return [state for state in states if state.state == ModelState.PLACED] @@ -230,4 +230,4 @@ def _release_one_copy(timestamp, states): states[i].is_reserved_for_batch = False return - self._insert_state_marker(time, _release_one_copy, _release_one_copy) \ No newline at end of file + self._insert_state_marker(time, _release_one_copy, _release_one_copy) diff --git a/workers/worker.py b/workers/worker.py index 3a5a2ef..5c1a9f7 100644 --- a/workers/worker.py +++ b/workers/worker.py @@ -65,7 +65,7 @@ def fetch_model(self, model, current_time): self.model_history_log.loc[len(self.model_history_log)] = { "start_time": current_time, - "end_time": current_time + fetch_time, + "end_time": current_time + fetch_time, "model_id": model.model_id, "placed_or_evicted": "placed" } @@ -96,7 +96,7 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory: if policy == self.LOOKAHEAD_EVICTION: next_models = self.get_next_models(3, current_time) placed_model_states = sorted( - placed_model_states, + placed_model_states, key=lambda m: next_models.index(m.model) if m.model in next_models else len(next_models), reverse=True ) @@ -107,31 +107,27 @@ def evict_models_from_GPU_until(self, current_time: float, min_required_memory: curr_memory += state.model.model_size models_to_evict.append(state.model) if curr_memory >= min_required_memory: - # NOTE: Assumes models can be evicted concurrently - model_evict_times = list(map(lambda m: SameMachineGPUtoCPU_delay(m.model_size), models_to_evict)) - eviction_duration = max(model_evict_times) - full_eviction_end = current_time + eviction_duration + # model_evict_times = list(map(lambda m: SameMachineGPUtoCPU_delay(m.model_size), models_to_evict)) + # eviction_duration = max(model_evict_times) + # full_eviction_end = current_time + eviction_duration # must reserve space to prevent other models from loading in space created here - extra_to_reserve = min_required_memory - sum(m.model_size for m in models_to_evict) - if extra_to_reserve > 0: - self.GPU_state.reserve_model_space(None, extra_to_reserve, current_time, full_eviction_end) + # extra_to_reserve = min_required_memory - sum(m.model_size for m in models_to_evict) + # if extra_to_reserve > 0: + # self.GPU_state.reserve_model_space(None, extra_to_reserve, current_time, full_eviction_end) for i in range(len(models_to_evict)): self.simulation.metadata_service.rm_model_cached_location( models_to_evict[i], self.worker_id, current_time) - self.GPU_state.evict_model(models_to_evict[i], - current_time, - model_evict_times[i], - reserve_until=full_eviction_end) + self.GPU_state.evict_model(models_to_evict[i], current_time, 0) self.model_history_log.loc[len(self.model_history_log)] = { "start_time": current_time, - "end_time": current_time + eviction_duration, + "end_time": current_time , "model_id": models_to_evict[i].model_id, "placed_or_evicted": "evicted" } - return eviction_duration + return 0 return 0 # ------------------------- cached model history update helper functions --------------- @@ -142,4 +138,4 @@ def get_history(self, history, current_time, info_staleness) -> list: if history[last_index][0] <= delayed_time: return history[last_index][1].copy() last_index -= 1 # check the previous one - return [] \ No newline at end of file + return []